mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 10:42:38 +08:00
[None][test] Update case for release (#10811)
Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
parent
18f63dfcec
commit
bcd2dc490c
@ -5104,7 +5104,7 @@ class TestDeepSeekR1LongBenchV2(LlmapiAccuracyTestHarness):
|
||||
pytest.skip(f"Model directory {model_dir} does not exist")
|
||||
|
||||
# Configure model settings
|
||||
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8,
|
||||
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.4,
|
||||
enable_block_reuse=True,
|
||||
enable_partial_reuse=False,
|
||||
dtype="fp8")
|
||||
|
||||
@ -119,6 +119,8 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp_trtllm]
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[latency]
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[throughput]
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1LongBenchV2::test_fp8_8gpus TIMEOUT (240)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1LongBenchV2::test_nvfp4_4gpus TIMEOUT (240)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline]
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline_mtp1]
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline_fp8kv]
|
||||
|
||||
@ -3,5 +3,3 @@ stress_test/stress_test.py::test_run_stress_test[DeepSeek-V3_tp8-stress_time_360
|
||||
stress_test/stress_test.py::test_run_stress_test[DeepSeek-R1_tp8-stress_time_3600s_timeout_5400s-MAX_UTILIZATION-pytorch-stress-test-with-accuracy]
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_stress_test[input8k-output1k-conc512-deepseek_r1_v2_fp4_stress]
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_stress_test[input8k-output1k-conc512-gpt_oss_120b_stress]
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1LongBenchV2::test_fp8_8gpus
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1LongBenchV2::test_nvfp4_4gpus
|
||||
|
||||
Loading…
Reference in New Issue
Block a user