From d6322f70b757831ddb890222c2de100128e20539 Mon Sep 17 00:00:00 2001 From: Naveassaf <55059536+Naveassaf@users.noreply.github.com> Date: Sun, 17 Aug 2025 20:38:36 +0300 Subject: [PATCH] [https://nvbugs/5451028][fix] Constrain NemotronSuper test parameters to prevent OOMs (#6970) Signed-off-by: Nave Assaf --- .../defs/accuracy/test_llm_api_pytorch.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index f847104866..649e826207 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -1770,29 +1770,28 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness): @pytest.mark.skip_less_device(2) @pytest.mark.skip_less_device_memory(80000) def test_auto_dtype_tp2(self): - with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm: + with LLM(self.MODEL_PATH, + tensor_parallel_size=2, + max_seq_len=8192, + max_batch_size=64) as llm: + # Run only one eval as maximal BS is not large task = MMLU(self.MODEL_NAME) task.evaluate(llm) - task = GSM8K(self.MODEL_NAME) - task.evaluate(llm) - task = GPQADiamond(self.MODEL_NAME) - task.evaluate(llm, - extra_evaluator_kwargs=dict(apply_chat_template=True)) @skip_pre_hopper @pytest.mark.skip_less_device(2) @pytest.mark.skip_device_not_contain(["H100", "B200"]) def test_fp8_prequantized_tp2(self): model_path = f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8" - with LLM(model_path, tensor_parallel_size=2) as llm: + with LLM(model_path, + tensor_parallel_size=2, + max_seq_len=8192, + max_batch_size=64) as llm: assert llm.args.quant_config.quant_algo == QuantAlgo.FP8 + + # Run only one eval as maximal BS is not large task = MMLU(self.MODEL_NAME) task.evaluate(llm) - task = GSM8K(self.MODEL_NAME) - task.evaluate(llm) - task = GPQADiamond(self.MODEL_NAME) - task.evaluate(llm, - extra_evaluator_kwargs=dict(apply_chat_template=True)) class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):