mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5451028][fix] Constrain NemotronSuper test parameters to prevent OOMs (#6970)
Signed-off-by: Nave Assaf <nassaf@nvidia.com>
This commit is contained in:
parent
3a49b47081
commit
d6322f70b7
@ -1770,29 +1770,28 @@ class TestLlama3_3NemotronSuper49Bv1(LlmapiAccuracyTestHarness):
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
def test_auto_dtype_tp2(self):
|
||||
with LLM(self.MODEL_PATH, tensor_parallel_size=2) as llm:
|
||||
with LLM(self.MODEL_PATH,
|
||||
tensor_parallel_size=2,
|
||||
max_seq_len=8192,
|
||||
max_batch_size=64) as llm:
|
||||
# Run only one eval as maximal BS is not large
|
||||
task = MMLU(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = GSM8K(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = GPQADiamond(self.MODEL_NAME)
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_device_not_contain(["H100", "B200"])
|
||||
def test_fp8_prequantized_tp2(self):
|
||||
model_path = f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8"
|
||||
with LLM(model_path, tensor_parallel_size=2) as llm:
|
||||
with LLM(model_path,
|
||||
tensor_parallel_size=2,
|
||||
max_seq_len=8192,
|
||||
max_batch_size=64) as llm:
|
||||
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
|
||||
|
||||
# Run only one eval as maximal BS is not large
|
||||
task = MMLU(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = GSM8K(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = GPQADiamond(self.MODEL_NAME)
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
|
||||
|
||||
class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user