mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Add two MTP disaggregated test (#4546)
Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
This commit is contained in:
parent
dec326ba7d
commit
01bd4c00b4
@ -0,0 +1,24 @@
|
||||
hostname: localhost
|
||||
port: 8000
|
||||
model: DeepSeek-V3-Lite/fp8
|
||||
free_gpu_memory_fraction: 0.1
|
||||
backend: "pytorch"
|
||||
use_cuda_graph: False
|
||||
disable_overlap_scheduler: True
|
||||
speculative_config:
|
||||
decoding_type: MTP
|
||||
num_nextn_predict_layers: 2
|
||||
context_servers:
|
||||
num_instances: 1
|
||||
tensor_parallel_size: 1
|
||||
pipeline_parallel_size: 1
|
||||
enable_attention_dp: true
|
||||
urls:
|
||||
- "localhost:8001"
|
||||
generation_servers:
|
||||
num_instances: 1
|
||||
tensor_parallel_size: 1
|
||||
pipeline_parallel_size: 1
|
||||
enable_attention_dp: false
|
||||
urls:
|
||||
- "localhost:8002"
|
||||
@ -106,6 +106,10 @@ def get_test_config(test_desc, example_dir, test_root):
|
||||
),
|
||||
"deepseek_v3_lite_bf16_conditional":
|
||||
(2, f"{test_configs_root}/disagg_config_conditional_deepseek_v3.yaml"),
|
||||
"deepseek_v3_lite_fp8_tp1_two_mtp":
|
||||
(2,
|
||||
f"{test_configs_root}/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_two_mtp.yaml"
|
||||
),
|
||||
}
|
||||
|
||||
if test_desc not in config_map:
|
||||
@ -808,3 +812,25 @@ def test_disaggregated_deepseek_v3_lite_bf16_conditional(
|
||||
"deepseek_v3_lite_bf16_conditional",
|
||||
env=llm_venv._new_env,
|
||||
cwd=llm_venv.get_working_directory())
|
||||
|
||||
|
||||
@skip_no_hopper
|
||||
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
|
||||
indirect=True)
|
||||
def test_disaggregated_deepseek_v3_lite_fp8_tp1_two_mtp(
|
||||
disaggregated_test_root, disaggregated_example_root, llm_venv,
|
||||
deepseek_v3_model_root):
|
||||
src_dst_dict = {
|
||||
deepseek_v3_model_root:
|
||||
f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8",
|
||||
}
|
||||
|
||||
for src, dst in src_dst_dict.items():
|
||||
if not os.path.islink(dst):
|
||||
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
||||
os.symlink(src, dst, target_is_directory=True)
|
||||
|
||||
run_disaggregated_test(disaggregated_example_root,
|
||||
"deepseek_v3_lite_fp8_tp1_two_mtp",
|
||||
env=llm_venv._new_env,
|
||||
cwd=llm_venv.get_working_directory())
|
||||
|
||||
@ -62,6 +62,7 @@ l0_h100:
|
||||
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu[DeepSeek-V3-Lite-fp8]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp[DeepSeek-V3-Lite-fp8]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_two_mtp[DeepSeek-V3-Lite-fp8]
|
||||
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu[DeepSeek-V3-Lite-fp8]
|
||||
- disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-False-TinyLlama-1.1B-Chat-v1.0]
|
||||
- disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-True-TinyLlama-1.1B-Chat-v1.0]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user