[#10580][fix] re-enable NemotronH MOE MMLU test (#10594)

Signed-off-by: Suyog Gupta <41447211+suyoggupta@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2026-01-12 09:26:07 -08:00 · 2026-01-12 09:26:07 -08:00 · a1385243e1
commit a1385243e1
parent 9f044b9dd9
1 changed files with 4 additions and 4 deletions
--- a/tests/integration/defs/accuracy/test_llm_api_autodeploy.py
+++ b/tests/integration/defs/accuracy/test_llm_api_autodeploy.py
@ -207,7 +207,7 @@ class TestNemotronMOE(LlmapiAccuracyTestHarness):
        kwargs = self.get_default_kwargs()
        # TODO: multi-stream MOE seems to increase the memory usage
        kwargs["max_batch_size"] = 32
-        kwargs["free_mem_ratio"] = 0.5
+        kwargs["free_mem_ratio"] = 0.4
        sampling_params = self.get_default_sampling_params()
        with AutoDeployLLM(model=self.MODEL_PATH_BF16,
                           tokenizer=self.MODEL_PATH_BF16,
@ -226,9 +226,9 @@ class TestNemotronMOE(LlmapiAccuracyTestHarness):
            # Manually set quant_config for FP8 model to get the accuracy threshold
            llm.args.quant_config.quant_algo = QuantAlgo.FP8
            llm.args.quant_config.kv_cache_quant_algo = QuantAlgo.FP8
-
-            # task = MMLU(self.MODEL_NAME)
-            # task.evaluate(llm, sampling_params=sampling_params)
+            sampling_params = self.get_default_sampling_params()
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm, sampling_params=sampling_params)
            task = GSM8K(self.MODEL_NAME)
            task.evaluate(llm)