[https://nvbugs/5451028][fix] Constrain NemotronSuper test parameters to prevent OOMs (#6970)

Signed-off-by: Nave Assaf <nassaf@nvidia.com>
This commit is contained in:
Naveassaf 2025-08-17 20:38:36 +03:00 committed by GitHub
parent 3a49b47081
commit d6322f70b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1770,29 +1770,28 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness):
@pytest.mark.skip_less_device(2)
@pytest.mark.skip_less_device_memory(80000)
def test_auto_dtype_tp2(self):
with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:
with LLM(self.MODEL_PATH,
tensor_parallel_size=2,
max_seq_len=8192,
max_batch_size=64) as llm:
# Run only one eval as maximal BS is not large
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
task = GPQADiamond(self.MODEL_NAME)
task.evaluate(llm,
extra_evaluator_kwargs=dict(apply_chat_template=True))
@skip_pre_hopper
@pytest.mark.skip_less_device(2)
@pytest.mark.skip_device_not_contain(["H100", "B200"])
def test_fp8_prequantized_tp2(self):
model_path = f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8"
with LLM(model_path, tensor_parallel_size=2) as llm:
with LLM(model_path,
tensor_parallel_size=2,
max_seq_len=8192,
max_batch_size=64) as llm:
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
# Run only one eval as maximal BS is not large
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
task = GPQADiamond(self.MODEL_NAME)
task.evaluate(llm,
extra_evaluator_kwargs=dict(apply_chat_template=True))
class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):