[TRTLLM-6541][test] Add NIM Related Cases [StarCoder2_7B] and [Codestral_22B_V01] (#6939)

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
This commit is contained in:
fredricz-20070104 2025-08-19 12:13:04 +08:00 committed by GitHub
parent 816a120af6
commit e90280a84d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 91 additions and 0 deletions

View File

@ -45,6 +45,14 @@ microsoft/Phi-3.5-mini-instruct:
- accuracy: 31.354
microsoft/Phi-4-mini-instruct:
- accuracy: 32.921
bigcode/starcoder2-7b:
- accuracy: 26.611
- quant_algo: FP8
accuracy: 26.611
mistralai/Codestral-22B-v0.1:
- accuracy: 30.316
- quant_algo: FP8
accuracy: 30.316
state-spaces/mamba-130m-hf:
- accuracy: 19.470
lmsys/vicuna-7b-v1.3:

View File

@ -162,6 +162,8 @@ microsoft/Phi-4-multimodal-instruct-long-rope:
- accuracy: 75.85
microsoft/Phi-4-mini-instruct:
- accuracy: 82.30
mistralai/Codestral-22B-v0.1:
- accuracy: 67.10
GPT-OSS/BF16:
- accuracy: 90.3
GPT-OSS/MXFP4:

View File

@ -232,6 +232,14 @@ nvidia/Nemotron-H-56B-Base-8K:
accuracy: 83.82
microsoft/Phi-4-mini-instruct:
- accuracy: 68.98
bigcode/starcoder2-7b:
- accuracy: 41.35
- quant_algo: FP8
accuracy: 41.35
mistralai/Codestral-22B-v0.1:
- accuracy: 61.72
- quant_algo: FP8
accuracy: 61.72
# Created a dummy accuracy to track tp_size=2 for phi4-mini model.
# TODO: update once https://nvbugs/5393849 is fixed.
microsoft/Phi-4-mini-instruct-tp2:

View File

@ -433,3 +433,55 @@ class TestEagle2Vicuna_7B_v1_3(LlmapiAccuracyTestHarness):
speculative_config=self.speculative_config) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
class TestStarCoder2_7B(LlmapiAccuracyTestHarness):
MODEL_NAME = "bigcode/starcoder2-7b"
MODEL_PATH = f"{llm_models_root()}/starcoder2-7b"
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
@pytest.mark.skip_less_device_memory(70000)
def test_auto_dtype(self):
with LLM(self.MODEL_PATH, kv_cache_config=self.kv_cache_config) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
@skip_pre_ada
@pytest.mark.skip_less_device_memory(70000)
def test_fp8(self):
quant_config = QuantConfig(QuantAlgo.FP8)
with LLM(self.MODEL_PATH,
quant_config=quant_config,
kv_cache_config=self.kv_cache_config) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
MODEL_NAME = "mistralai/Codestral-22B-v0.1"
MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
@pytest.mark.skip_less_device_memory(80000)
def test_auto_dtype(self):
with LLM(self.MODEL_PATH, kv_cache_config=self.kv_cache_config) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
@skip_pre_ada
@pytest.mark.skip_less_device_memory(80000)
def test_fp8(self):
quant_config = QuantConfig(QuantAlgo.FP8)
with LLM(self.MODEL_PATH,
quant_config=quant_config,
kv_cache_config=self.kv_cache_config) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)

View File

@ -2438,6 +2438,22 @@ class TestPhi4MiniInstruct(LlmapiAccuracyTestHarness):
task.evaluate(llm)
class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
MODEL_NAME = "mistralai/Codestral-22B-v0.1"
MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
@pytest.mark.skip_less_device_memory(80000)
def test_auto_dtype(self):
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
class TestKanana_Instruct(LlmapiAccuracyTestHarness):
MODEL_NAME = "kanana-1.5-2.1b-instruct-2505"
MODEL_PATH = f"{llm_models_root()}/kanana-1.5-2.1b-instruct-2505"

View File

@ -21,3 +21,8 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu
accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True]
accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype
accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8
accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype
accuracy/test_llm_api.py::TestCodestral_22B_V01::test_fp8
accuracy/test_llm_api_pytorch.py::TestCodestral_22B_V01::test_auto_dtype