[#10580][fix] re-enable NemotronH MOE MMLU test (#10594)

Signed-off-by: Suyog Gupta <41447211+suyoggupta@users.noreply.github.com>
This commit is contained in:
Suyog Gupta 2026-01-12 09:26:07 -08:00 committed by GitHub
parent 9f044b9dd9
commit a1385243e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -207,7 +207,7 @@ class TestNemotronMOE(LlmapiAccuracyTestHarness):
kwargs = self.get_default_kwargs()
# TODO: multi-stream MOE seems to increase the memory usage
kwargs["max_batch_size"] = 32
kwargs["free_mem_ratio"] = 0.5
kwargs["free_mem_ratio"] = 0.4
sampling_params = self.get_default_sampling_params()
with AutoDeployLLM(model=self.MODEL_PATH_BF16,
tokenizer=self.MODEL_PATH_BF16,
@ -226,9 +226,9 @@ class TestNemotronMOE(LlmapiAccuracyTestHarness):
# Manually set quant_config for FP8 model to get the accuracy threshold
llm.args.quant_config.quant_algo = QuantAlgo.FP8
llm.args.quant_config.kv_cache_quant_algo = QuantAlgo.FP8
# task = MMLU(self.MODEL_NAME)
# task.evaluate(llm, sampling_params=sampling_params)
sampling_params = self.get_default_sampling_params()
task = MMLU(self.MODEL_NAME)
task.evaluate(llm, sampling_params=sampling_params)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)