tests: add ds r1 tp4 test (#5197)

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
xinhe-nv 2025-06-19 12:48:33 +08:00 committed by GitHub
parent dedce8ab0e
commit e5400eeae0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 73 additions and 16 deletions

View File

@ -987,25 +987,80 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
task.evaluate(llm)
@pytest.mark.timeout(7200)
@pytest.mark.skip_less_device_memory(80000)
class TestDeepSeekR1(LlmapiAccuracyTestHarness):
MODEL_NAME = "deepseek-ai/DeepSeek-R1"
MODEL_PATH = f"{llm_models_root()}/DeepSeek-R1/DeepSeek-R1"
@pytest.mark.skip_less_mpi_world_size(8)
@skip_pre_blackwell
@pytest.mark.parametrize(
"tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size,moe_backend",
[
# Use a larger batch_size to speed up the tests
(8, 1, 4, 3, False, False, True, True, 32, "CUTLASS"),
(8, 1, 4, 3, False, False, True, True, 32, "TRTLLM"),
(8, 1, 8, 0, True, True, True, True, 32, "CUTLASS"),
(8, 1, 1, 0, True, True, True, True, 32, "CUTLASS"),
pytest.param(8,
1,
4,
3,
False,
False,
True,
True,
32,
"CUTLASS",
marks=pytest.mark.skip_less_device(8)),
pytest.param(8,
1,
4,
3,
False,
False,
True,
True,
32,
"TRTLLM",
marks=pytest.mark.skip_less_device(8)),
pytest.param(8,
1,
8,
0,
True,
True,
True,
True,
32,
"CUTLASS",
marks=pytest.mark.skip_less_device(8)),
pytest.param(8,
1,
1,
0,
True,
True,
True,
True,
32,
"CUTLASS",
marks=pytest.mark.skip_less_device(8)),
pytest.param(4,
1,
1,
0,
True,
True,
True,
True,
16,
"CUTLASS",
marks=pytest.mark.skip_less_device(4)),
],
ids=["latency", "latency_trtllmgen", "throughput", "throughput_tp8"])
def test_nvfp4_8gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
attention_dp, cuda_graph, overlap_scheduler,
max_batch_size, moe_backend):
ids=[
"latency", "latency_trtllmgen", "throughput", "throughput_tp8",
"throughput_tp4"
])
def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
attention_dp, cuda_graph, overlap_scheduler,
max_batch_size, moe_backend):
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
@ -1042,9 +1097,10 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
task = GPQADiamond(self.MODEL_NAME)
task.evaluate(llm,
extra_evaluator_kwargs=dict(apply_chat_template=True))
# Commented out because GPQA takes too long to run
# task = GPQADiamond(self.MODEL_NAME)
# task.evaluate(llm,
# extra_evaluator_kwargs=dict(apply_chat_template=True))
@pytest.mark.skip_less_mpi_world_size(8)
@skip_pre_hopper

View File

@ -468,10 +468,11 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1NemotronNano8Bv1::test_fp8_prequan
accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
accuracy/test_llm_api_pytorch.py::TestNemotronH::test_reasoning_fp8_prequantized
accuracy/test_llm_api_pytorch.py::TestQwen2_7BInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[latency]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[latency_trtllmgen]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[throughput]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_8gpus[throughput_tp8]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8]
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp4]
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency]
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass]