[TRTLLM-6541][test] Add NIM Related Cases [StarCoder2_7B] and [Codestral_22B_V01] (#6939)

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-08-19 12:13:04 +08:00 · 2025-08-19 12:13:04 +08:00 · e90280a84d
commit e90280a84d
parent 816a120af6
6 changed files with 91 additions and 0 deletions
--- a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml
+++ b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml
@ -45,6 +45,14 @@ microsoft/Phi-3.5-mini-instruct:
  - accuracy: 31.354
 microsoft/Phi-4-mini-instruct:
  - accuracy: 32.921
+bigcode/starcoder2-7b:
+  - accuracy: 26.611
+  - quant_algo: FP8
+    accuracy: 26.611
+mistralai/Codestral-22B-v0.1:
+  - accuracy: 30.316
+  - quant_algo: FP8
+    accuracy: 30.316
 state-spaces/mamba-130m-hf:
  - accuracy: 19.470
 lmsys/vicuna-7b-v1.3:
--- a/tests/integration/defs/accuracy/references/gsm8k.yaml
+++ b/tests/integration/defs/accuracy/references/gsm8k.yaml
@ -162,6 +162,8 @@ microsoft/Phi-4-multimodal-instruct-long-rope:
  - accuracy: 75.85
 microsoft/Phi-4-mini-instruct:
  - accuracy: 82.30
+mistralai/Codestral-22B-v0.1:
+  - accuracy: 67.10
 GPT-OSS/BF16:
  - accuracy: 90.3
 GPT-OSS/MXFP4:
--- a/tests/integration/defs/accuracy/references/mmlu.yaml
+++ b/tests/integration/defs/accuracy/references/mmlu.yaml
@ -232,6 +232,14 @@ nvidia/Nemotron-H-56B-Base-8K:
    accuracy: 83.82
 microsoft/Phi-4-mini-instruct:
  - accuracy: 68.98
+bigcode/starcoder2-7b:
+  - accuracy: 41.35
+  - quant_algo: FP8
+    accuracy: 41.35
+mistralai/Codestral-22B-v0.1:
+  - accuracy: 61.72
+  - quant_algo: FP8
+    accuracy: 61.72
 # Created a dummy accuracy to track tp_size=2 for phi4-mini model.
 # TODO: update once https://nvbugs/5393849 is fixed.
 microsoft/Phi-4-mini-instruct-tp2:
--- a/tests/integration/defs/accuracy/test_llm_api.py
+++ b/tests/integration/defs/accuracy/test_llm_api.py
@ -433,3 +433,55 @@ class TestEagle2Vicuna_7B_v1_3(LlmapiAccuracyTestHarness):
                speculative_config=self.speculative_config) as llm:
            task = CnnDailymail(self.MODEL_NAME)
            task.evaluate(llm)
+
+
+class TestStarCoder2_7B(LlmapiAccuracyTestHarness):
+    MODEL_NAME = "bigcode/starcoder2-7b"
+    MODEL_PATH = f"{llm_models_root()}/starcoder2-7b"
+    kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
+
+    @pytest.mark.skip_less_device_memory(70000)
+    def test_auto_dtype(self):
+        with LLM(self.MODEL_PATH, kv_cache_config=self.kv_cache_config) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
+
+    @skip_pre_ada
+    @pytest.mark.skip_less_device_memory(70000)
+    def test_fp8(self):
+        quant_config = QuantConfig(QuantAlgo.FP8)
+        with LLM(self.MODEL_PATH,
+                 quant_config=quant_config,
+                 kv_cache_config=self.kv_cache_config) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
+
+
+class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
+    MODEL_NAME = "mistralai/Codestral-22B-v0.1"
+    MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
+    kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
+
+    @pytest.mark.skip_less_device_memory(80000)
+    def test_auto_dtype(self):
+        with LLM(self.MODEL_PATH, kv_cache_config=self.kv_cache_config) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
+
+    @skip_pre_ada
+    @pytest.mark.skip_less_device_memory(80000)
+    def test_fp8(self):
+        quant_config = QuantConfig(QuantAlgo.FP8)
+        with LLM(self.MODEL_PATH,
+                 quant_config=quant_config,
+                 kv_cache_config=self.kv_cache_config) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@ -2438,6 +2438,22 @@ class TestPhi4MiniInstruct(LlmapiAccuracyTestHarness):
            task.evaluate(llm)


+class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
+    MODEL_NAME = "mistralai/Codestral-22B-v0.1"
+    MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
+
+    @pytest.mark.skip_less_device_memory(80000)
+    def test_auto_dtype(self):
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
+        with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = GSM8K(self.MODEL_NAME)
+            task.evaluate(llm)
+
+
 class TestKanana_Instruct(LlmapiAccuracyTestHarness):
    MODEL_NAME = "kanana-1.5-2.1b-instruct-2505"
    MODEL_PATH = f"{llm_models_root()}/kanana-1.5-2.1b-instruct-2505"
--- a/tests/integration/test_lists/qa/llm_function_nim.txt
+++ b/tests/integration/test_lists/qa/llm_function_nim.txt
@ -21,3 +21,8 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True]
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
+accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype
+accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8
+accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype
+accuracy/test_llm_api.py::TestCodestral_22B_V01::test_fp8
+accuracy/test_llm_api_pytorch.py::TestCodestral_22B_V01::test_auto_dtype