[https://nvbugs/5556020][fix] test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3 dimension mismatch (#8517)

Signed-off-by: qgai <qgai@nvidia.com>
2026-01-14 06:27:45 +08:00 · 2025-10-22 09:58:22 +08:00 · 2025-10-22 09:58:22 +08:00 · 90080e0e09
commit 90080e0e09
parent 50d4e5bc06
2 changed files with 2 additions and 3 deletions
--- a/tensorrt_llm/_torch/speculative/model_drafter.py
+++ b/tensorrt_llm/_torch/speculative/model_drafter.py
@ -506,9 +506,9 @@ class ModelDrafter(Drafter):
                    continue

                # Get the index of the draft/target tokens in the device tensor
-                draft_idx = req_idx if self.use_static_draft_loop else request.py_batch_idx
+                draft_idx = req_idx if self.use_static_draft_loop else request.py_seq_slot
                target_idx = req_id_to_old_request[
-                    request.py_request_id].py_batch_idx
+                    request.py_request_id].py_seq_slot
                target_inputs.new_tokens[draft_position + 1:draft_position +
                                         draft_length + 1, target_idx,
                                         0] = draft_tensors[0:draft_length,
--- a/tests/integration/defs/accuracy/test_disaggregated_serving.py
+++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@ -452,7 +452,6 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
            task = GSM8K(self.MODEL_NAME)
            task.evaluate(llm)

-    @pytest.mark.skip(reason="https://nvbugs/5556020")
    @pytest.mark.skip_less_device(2)
    @skip_pre_hopper
    @parametrize_with_ids("overlap_scheduler", [True, False])