[https://nvbugs/5698434][test] add qwen3-4b accuracy test case (#10382)

Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
Ivy Zhang 2026-01-07 10:56:34 +08:00 committed by GitHub
parent 6095c80e56
commit 4a1b2e23b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 34 additions and 0 deletions

View File

@ -106,6 +106,9 @@ deepseek-ai/DeepSeek-V3.2-Exp:
- quant_algo: NVFP4
spec_dec_algo: MTP
accuracy: 95.6
Qwen3/Qwen3-4B:
- spec_dec_algo: Eagle
accuracy: 85.823
Qwen3/Qwen3-8B:
- accuracy: 87.1114
- spec_dec_algo: Eagle

View File

@ -3327,6 +3327,35 @@ class TestQwen2_7BInstruct(LlmapiAccuracyTestHarness):
extra_evaluator_kwargs=self.EXTRA_EVALUATOR_KWARGS)
class TestQwen3_4B(LlmapiAccuracyTestHarness):
MODEL_NAME = "Qwen3/Qwen3-4B"
def test_eagle3(self):
"RCCA: https://nvbugspro.nvidia.com/bug/5698434"
pytorch_config = dict(
disable_overlap_scheduler=True,
cuda_graph_config=CudaGraphConfig(),
)
kv_cache_config = KvCacheConfig(
enable_block_reuse=False,
free_gpu_memory_fraction=0.6,
)
eagle_model_dir = f"{llm_models_root()}/Qwen3/Qwen3-4B_eagle3/"
target_model_dir = f"{llm_models_root()}/Qwen3/Qwen3-4B"
draft_len = 3
spec_config = EagleDecodingConfig(max_draft_len=draft_len,
speculative_model_dir=eagle_model_dir)
with LLM(model=target_model_dir,
**pytorch_config,
kv_cache_config=kv_cache_config,
speculative_config=spec_config) as llm:
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
class TestQwen3_8B(LlmapiAccuracyTestHarness):
MODEL_NAME = "Qwen3/Qwen3-8B"

View File

@ -525,6 +525,7 @@ accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_multi_gpus[throughput]
accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_multi_gpus[throughput_trtllm]
accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_2_model_mtp[2model]
accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_2_model_mtp[2model_trtllm]
accuracy/test_llm_api_pytorch.py::TestQwen3_4B::test_eagle3
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[multi_gpus_no_cache]
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]

View File

@ -229,6 +229,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-laten
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[fp8-latency]
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[mxfp8-latency]
accuracy/test_llm_api_pytorch.py::TestQwen3_4B::test_eagle3
accuracy/test_llm_api_pytorch.py::TestSeedOss_36B::test_auto_dtype
accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_VL_7B::test_auto_dtype