Add two MTP disaggregated test (#4546)

Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
This commit is contained in:
Iman Tabrizian 2025-06-13 00:17:45 -04:00 committed by GitHub
parent dec326ba7d
commit 01bd4c00b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 51 additions and 0 deletions

View File

@ -0,0 +1,24 @@
hostname: localhost
port: 8000
model: DeepSeek-V3-Lite/fp8
free_gpu_memory_fraction: 0.1
backend: "pytorch"
use_cuda_graph: False
disable_overlap_scheduler: True
speculative_config:
decoding_type: MTP
num_nextn_predict_layers: 2
context_servers:
num_instances: 1
tensor_parallel_size: 1
pipeline_parallel_size: 1
enable_attention_dp: true
urls:
- "localhost:8001"
generation_servers:
num_instances: 1
tensor_parallel_size: 1
pipeline_parallel_size: 1
enable_attention_dp: false
urls:
- "localhost:8002"

View File

@ -106,6 +106,10 @@ def get_test_config(test_desc, example_dir, test_root):
),
"deepseek_v3_lite_bf16_conditional":
(2, f"{test_configs_root}/disagg_config_conditional_deepseek_v3.yaml"),
"deepseek_v3_lite_fp8_tp1_two_mtp":
(2,
f"{test_configs_root}/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_two_mtp.yaml"
),
}
if test_desc not in config_map:
@ -808,3 +812,25 @@ def test_disaggregated_deepseek_v3_lite_bf16_conditional(
"deepseek_v3_lite_bf16_conditional",
env=llm_venv._new_env,
cwd=llm_venv.get_working_directory())
@skip_no_hopper
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_tp1_two_mtp(
disaggregated_test_root, disaggregated_example_root, llm_venv,
deepseek_v3_model_root):
src_dst_dict = {
deepseek_v3_model_root:
f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8",
}
for src, dst in src_dst_dict.items():
if not os.path.islink(dst):
os.makedirs(os.path.dirname(dst), exist_ok=True)
os.symlink(src, dst, target_is_directory=True)
run_disaggregated_test(disaggregated_example_root,
"deepseek_v3_lite_fp8_tp1_two_mtp",
env=llm_venv._new_env,
cwd=llm_venv.get_working_directory())

View File

@ -62,6 +62,7 @@ l0_h100:
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu[DeepSeek-V3-Lite-fp8]
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp[DeepSeek-V3-Lite-fp8]
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_two_mtp[DeepSeek-V3-Lite-fp8]
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu[DeepSeek-V3-Lite-fp8]
- disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-False-TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-True-TinyLlama-1.1B-Chat-v1.0]