[https://nvbugs/5556020][fix] test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3 dimension mismatch (#8517)

Signed-off-by: qgai <qgai@nvidia.com>
This commit is contained in:
sunnyqgg 2025-10-22 09:58:22 +08:00 committed by GitHub
parent 50d4e5bc06
commit 90080e0e09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 3 deletions

View File

@ -506,9 +506,9 @@ class ModelDrafter(Drafter):
continue
# Get the index of the draft/target tokens in the device tensor
draft_idx = req_idx if self.use_static_draft_loop else request.py_batch_idx
draft_idx = req_idx if self.use_static_draft_loop else request.py_seq_slot
target_idx = req_id_to_old_request[
request.py_request_id].py_batch_idx
request.py_request_id].py_seq_slot
target_inputs.new_tokens[draft_position + 1:draft_position +
draft_length + 1, target_idx,
0] = draft_tensors[0:draft_length,

View File

@ -452,7 +452,6 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
@pytest.mark.skip(reason="https://nvbugs/5556020")
@pytest.mark.skip_less_device(2)
@skip_pre_hopper
@parametrize_with_ids("overlap_scheduler", [True, False])