diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index ba78064b52..c3d0fb05de 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -207,9 +207,16 @@ def launch_disaggregated_llm( env["TRTLLM_USE_UCX_KVCACHE"] = "1" if enable_perf: env["TRTLLM_KVCACHE_TIME_OUTPUT_PATH"] = kv_cache_perf_dir + + cache_transceiver_config_backend = ctx_server_config.get( + "cache_transceiver_config", {}).get("backend", "DEFAULT") + if cache_transceiver_config_backend == "NIXL": + env["UCX_MM_ERROR_HANDLING"] = "y" gpu_range = range(current_gpu_offset, current_gpu_offset + ctx_total_gpus) env["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range)) + if not has_nvlink(): + env["UCX_TLS"] = "^cuda_ipc" current_gpu_offset += ctx_total_gpus ctx_server_args = ctx_args + [ @@ -230,6 +237,10 @@ def launch_disaggregated_llm( env["TRTLLM_USE_UCX_KVCACHE"] = "1" if enable_perf: env["TRTLLM_KVCACHE_TIME_OUTPUT_PATH"] = kv_cache_perf_dir + cache_transceiver_config_backend = gen_server_config.get( + "cache_transceiver_config", {}).get("backend", "DEFAULT") + if cache_transceiver_config_backend == "NIXL": + env["UCX_MM_ERROR_HANDLING"] = "y" gpu_range = range(current_gpu_offset, current_gpu_offset + gen_total_gpus) env["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range)) diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index 86ba1c4517..023681c0ff 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -365,8 +365,13 @@ def run_disaggregated_test(example_dir, """Run disaggregated test with given configuration.""" cleanup_output_files() run_env = env.copy() - run_env["UCX_TLS"] = "^ib" + # on some CI nodes , we set UCX_TLS to "^ib" to avoid the issue that IB equipped but not available. + # we set UCX_MM_ERROR_HANDLING to "y" to avoid the issue that NIXL cannot use IB or TCP for notify on some CI nodes, + # setting it to "y" will enable NIXL to use system memory for notify. + + run_env["UCX_TLS"] = "^ib" + run_env["UCX_MM_ERROR_HANDLING"] = "y" num_ranks, config_file = get_test_config(test_desc, example_dir, os.path.dirname(__file__)) @@ -1190,6 +1195,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root, env = llm_venv._new_env.copy() env["TRTLLM_USE_NIXL_KVCACHE"] = "1" env["UCX_TLS"] = "^ib" + env["UCX_MM_ERROR_HANDLING"] = "y" run_disaggregated_test(disaggregated_example_root, "deepseek_v3_lite_fp8_nixl", env=env, @@ -1497,6 +1503,7 @@ def run_disaggregated_benchmark(example_dir, """Run disaggregated test with given configuration.""" run_env = env.copy() run_env["UCX_TLS"] = "^ib" + run_env["UCX_MM_ERROR_HANDLING"] = "y" workers_cmd = [ 'mpirun', '--allow-run-as-root', '--oversubscribe', '-n', str(num_ranks), 'trtllm-serve', 'disaggregated_mpi_worker', '-c', @@ -1677,6 +1684,7 @@ def run_disaggregated_aiperf(config_file, cleanup_output_files() run_env = env.copy() run_env["UCX_TLS"] = "^ib" + run_env["UCX_MM_ERROR_HANDLING"] = "y" workers_cmd = [ 'mpirun', '--allow-run-as-root', '--oversubscribe', '-n', diff --git a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py index 4e146f3df0..1d10ead5f8 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py +++ b/tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py @@ -184,7 +184,11 @@ def verify_disaggregated(model, generation_overlap, enable_cuda_graph, prompt, port_name = mpi_publish_name() - with MPIPoolExecutor(max_workers=2, env={"UCX_TLS": "^ib"}) as executor: + with MPIPoolExecutor(max_workers=2, + env={ + "UCX_TLS": "^ib", + "UCX_MM_ERROR_HANDLING": "y" + }) as executor: futures = [] try: for worker_arg in worker_args: @@ -330,7 +334,11 @@ def test_disaggregated_llama_context_capacity(model, enable_cuda_graph, prompt = "European Union is a political and economic union of 27 countries. The European Union is headquartered in Brussels, Belgium. The first president of the European Union was Jean-Claude Juncker. The current president is Ursula von der Leyen. The European Union is a major economic and political entity." - with MPIPoolExecutor(max_workers=2, env={"UCX_TLS": "^ib"}) as executor: + with MPIPoolExecutor(max_workers=2, + env={ + "UCX_TLS": "^ib", + "UCX_MM_ERROR_HANDLING": "y" + }) as executor: futures = [] try: for worker_arg in worker_args: @@ -440,6 +448,7 @@ def test_disaggregated_spec_dec_batch_slot_limit(model, spec_dec_model_path, with MPIPoolExecutor(max_workers=2, env={ "UCX_TLS": "^ib", + "UCX_MM_ERROR_HANDLING": "y", "OMPI_MCA_rmaps_base_oversubscribe": "1" }, mpi_info=mpi_info) as executor: diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index c9f8c9e1f3..182b9075fa 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -312,9 +312,6 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_fp8[throughput_laten triton_server/test_triton.py::test_llava_onevision[llava_onevision] SKIP (https://nvbugs/5775205) triton_server/test_triton.py::test_gpt_ib_lad[gpt-ib-lad] SKIP (https://nvbugs/5775223) unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_cute_dsl_multi_gpu[MoEWeightLoadingMode.FUSED_GATE_UP_PROJ-DefaultMoeRoutingMethod-1] SKIP (https://nvbugs/5775256) -disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8] SKIP (https://nvbugs/5769890) -disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5769890) -disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5769890,https://nvbugs/5748683) accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_pp4_mtp] SKIP (https://nvbugs/5779536) unittest/_torch/ray_orchestrator/multi_gpu/test_ops.py::test_reducescatter_pg_op[var_len:True-seqlen:16-hidden:128] SKIP (https://nvbugs/5781383) cpp/test_e2e.py::test_model[-mamba-86] SKIP (https://nvbugs/5781665)