mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5433545][fix] TestPhi4MiniInstruct::test_auto_dtype - Use max_seq_len=4096 to fallback to the short RoPE factor (#6895)
Signed-off-by: Michal Guzek <mguzek@nvidia.com>
This commit is contained in:
parent
4b6cca0662
commit
7ea53ff516
@ -2334,7 +2334,7 @@ class TestPhi4MiniInstruct(LlmapiAccuracyTestHarness):
|
||||
MODEL_PATH = f"{llm_models_root()}/Phi-4-mini-instruct"
|
||||
|
||||
def test_auto_dtype(self):
|
||||
with LLM(self.MODEL_PATH) as llm:
|
||||
with LLM(self.MODEL_PATH, max_seq_len=4096) as llm:
|
||||
task = CnnDailymail(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = MMLU(self.MODEL_NAME)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user