Update ds v3 parameters in stress test. (#3676)

This commit is contained in:
dominicshanshan 2025-04-22 18:04:17 +08:00 committed by GitHub
parent 793d0102d6
commit 792b71f412
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 5 deletions

View File

@ -56,7 +56,7 @@ from defs.trt_test_alternative import (Popen, cleanup_process_tree, print_info,
# [sys.executable, "-m", "pip", "install", "-r", requirements_file])
# Define a constant for process termination timeouts
GRACEFUL_TERMINATION_TIMEOUT = 10 # seconds - set longer when stress large model
GRACEFUL_TERMINATION_TIMEOUT = 300 # seconds - set longer when stress large model
@dataclass(frozen=True)
@ -384,7 +384,34 @@ def stress_test(config, test_mode, server_config=None):
)
# Define test configurations
performance_config = PerformanceParams() if run_performance else None
performance_config = None
if run_performance:
performance_config = PerformanceParams()
# For ds v3 specific parameters
if "DeepSeek-V3" in config.model_dir:
performance_config = PerformanceParams(
test_timeout=
36000 # 10 hours for ds v3, change this value if needed
)
# For ds v3 specific server parameters
if "DeepSeek-V3" in config.model_dir:
test_server_config = ServerConfig(
port=test_server_config.port,
host=test_server_config.host,
pp_size=test_server_config.pp_size,
ep_size=8, # DeepSeek-V3 specific ep_size
max_batch_size=161, # DeepSeek-V3 specific max_batch_size
max_num_tokens=1160, # DeepSeek-V3 specific max_num_tokens
kv_cache_free_gpu_memory_fraction=
0.7, # DeepSeek-V3 specific kv_cache fraction
capacity_scheduler_policy=test_server_config.
capacity_scheduler_policy,
wait_interval=test_server_config.wait_interval,
max_wait_seconds=7200, # DeepSeek-V3 specific wait time (2 hours)
health_check_timeout=test_server_config.health_check_timeout)
stress_config = StressTestConfig(
model_config=config,
server_config=test_server_config) if run_stress else None
@ -405,7 +432,7 @@ def stress_test(config, test_mode, server_config=None):
if not os.path.exists(model_path):
raise RuntimeError(f"Model path does not exist: {model_path}")
# Create a temporary YAML file for 'capacity_scheduler_policy'
# Create a temporary YAML file for extra_llm_options
extra_llm_options = {
"scheduler_config": {
"capacity_scheduler_policy":
@ -413,6 +440,21 @@ def stress_test(config, test_mode, server_config=None):
}
}
# Add DeepSeek-V3 specific configuration
if "DeepSeek-V3" in config.model_dir:
extra_llm_options["enable_attention_dp"] = True
if config.backend == "pytorch":
extra_llm_options["pytorch_backend_config"] = {
"use_cuda_graph": True,
"cuda_graph_padding_enabled": True,
"cuda_graph_batch_sizes":
[1, 2, 4, 8, 16, 32, 64, 128, 256, 384],
"print_iter_log": True,
"enable_overlap_scheduler": True
}
with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml',
delete=False) as temp_file:
yaml.dump(extra_llm_options, temp_file)

View File

@ -19,7 +19,7 @@ l0_a10:
- disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-MAX_UTILIZATION-pytorch-stress-test]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-pytorch-stress-stage-alone]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-pytorch-stress-test]
- condition:
ranges:
system_gpu_count:
@ -111,7 +111,7 @@ l0_a10:
- examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]
- examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin] # 3 mins
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-MAX_UTILIZATION-trt-stress-test]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-trt-stress-stage-alone]
- stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-GUARANTEED_NO_EVICT-trt-stress-test]
- condition:
ranges:
system_gpu_count: