mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5556020][fix] test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3 dimension mismatch (#8517)
Signed-off-by: qgai <qgai@nvidia.com>
This commit is contained in:
parent
50d4e5bc06
commit
90080e0e09
@ -506,9 +506,9 @@ class ModelDrafter(Drafter):
|
||||
continue
|
||||
|
||||
# Get the index of the draft/target tokens in the device tensor
|
||||
draft_idx = req_idx if self.use_static_draft_loop else request.py_batch_idx
|
||||
draft_idx = req_idx if self.use_static_draft_loop else request.py_seq_slot
|
||||
target_idx = req_id_to_old_request[
|
||||
request.py_request_id].py_batch_idx
|
||||
request.py_request_id].py_seq_slot
|
||||
target_inputs.new_tokens[draft_position + 1:draft_position +
|
||||
draft_length + 1, target_idx,
|
||||
0] = draft_tensors[0:draft_length,
|
||||
|
||||
@ -452,7 +452,6 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
|
||||
task = GSM8K(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugs/5556020")
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@skip_pre_hopper
|
||||
@parametrize_with_ids("overlap_scheduler", [True, False])
|
||||
|
||||
Loading…
Reference in New Issue
Block a user