diff --git a/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_two_mtp.yaml b/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_two_mtp.yaml new file mode 100644 index 0000000000..5c41612f1d --- /dev/null +++ b/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_two_mtp.yaml @@ -0,0 +1,24 @@ +hostname: localhost +port: 8000 +model: DeepSeek-V3-Lite/fp8 +free_gpu_memory_fraction: 0.1 +backend: "pytorch" +use_cuda_graph: False +disable_overlap_scheduler: True +speculative_config: + decoding_type: MTP + num_nextn_predict_layers: 2 +context_servers: + num_instances: 1 + tensor_parallel_size: 1 + pipeline_parallel_size: 1 + enable_attention_dp: true + urls: + - "localhost:8001" +generation_servers: + num_instances: 1 + tensor_parallel_size: 1 + pipeline_parallel_size: 1 + enable_attention_dp: false + urls: + - "localhost:8002" diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index a2438a8e15..8631fe45d6 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -106,6 +106,10 @@ def get_test_config(test_desc, example_dir, test_root): ), "deepseek_v3_lite_bf16_conditional": (2, f"{test_configs_root}/disagg_config_conditional_deepseek_v3.yaml"), + "deepseek_v3_lite_fp8_tp1_two_mtp": + (2, + f"{test_configs_root}/disagg_config_ctxtp1_gentp1_deepseek_v3_lite_two_mtp.yaml" + ), } if test_desc not in config_map: @@ -808,3 +812,25 @@ def test_disaggregated_deepseek_v3_lite_bf16_conditional( "deepseek_v3_lite_bf16_conditional", env=llm_venv._new_env, cwd=llm_venv.get_working_directory()) + + +@skip_no_hopper +@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], + indirect=True) +def test_disaggregated_deepseek_v3_lite_fp8_tp1_two_mtp( + disaggregated_test_root, disaggregated_example_root, llm_venv, + deepseek_v3_model_root): + src_dst_dict = { + deepseek_v3_model_root: + f"{llm_venv.get_working_directory()}/DeepSeek-V3-Lite/fp8", + } + + for src, dst in src_dst_dict.items(): + if not os.path.islink(dst): + os.makedirs(os.path.dirname(dst), exist_ok=True) + os.symlink(src, dst, target_is_directory=True) + + run_disaggregated_test(disaggregated_example_root, + "deepseek_v3_lite_fp8_tp1_two_mtp", + env=llm_venv._new_env, + cwd=llm_venv.get_working_directory()) diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml index 954a8ff3da..087032a06a 100644 --- a/tests/integration/test_lists/test-db/l0_h100.yml +++ b/tests/integration/test_lists/test-db/l0_h100.yml @@ -62,6 +62,7 @@ l0_h100: - test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True] - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu[DeepSeek-V3-Lite-fp8] - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp[DeepSeek-V3-Lite-fp8] + - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_two_mtp[DeepSeek-V3-Lite-fp8] - disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu[DeepSeek-V3-Lite-fp8] - disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-False-TinyLlama-1.1B-Chat-v1.0] - disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-True-TinyLlama-1.1B-Chat-v1.0]