mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
Merge d7b3cd4ec5 into 38296a472b
This commit is contained in:
commit
33c1106030
@ -365,8 +365,13 @@ def run_disaggregated_test(example_dir,
|
||||
"""Run disaggregated test with given configuration."""
|
||||
cleanup_output_files()
|
||||
run_env = env.copy()
|
||||
run_env["UCX_TLS"] = "^ib"
|
||||
|
||||
# on some CI nodes , we set UCX_TLS to "^ib" to avoid the issue that IB equipped but not available.
|
||||
# we set UCX_MM_ERROR_HANDLING to "y" to avoid the issue that NIXL cannot use IB or TCP for notify on some CI nodes,
|
||||
# setting it to "y" will enable NIXL to use system memory for notify.
|
||||
|
||||
run_env["UCX_TLS"] = "^ib"
|
||||
run_env["UCX_MM_ERROR_HANDLING"] = "y"
|
||||
num_ranks, config_file = get_test_config(test_desc, example_dir,
|
||||
os.path.dirname(__file__))
|
||||
|
||||
@ -1190,6 +1195,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root,
|
||||
env = llm_venv._new_env.copy()
|
||||
env["TRTLLM_USE_NIXL_KVCACHE"] = "1"
|
||||
env["UCX_TLS"] = "^ib"
|
||||
env["UCX_MM_ERROR_HANDLING"] = "y"
|
||||
run_disaggregated_test(disaggregated_example_root,
|
||||
"deepseek_v3_lite_fp8_nixl",
|
||||
env=env,
|
||||
@ -1497,6 +1503,7 @@ def run_disaggregated_benchmark(example_dir,
|
||||
"""Run disaggregated test with given configuration."""
|
||||
run_env = env.copy()
|
||||
run_env["UCX_TLS"] = "^ib"
|
||||
run_env["UCX_MM_ERROR_HANDLING"] = "y"
|
||||
workers_cmd = [
|
||||
'mpirun', '--allow-run-as-root', '--oversubscribe', '-n',
|
||||
str(num_ranks), 'trtllm-serve', 'disaggregated_mpi_worker', '-c',
|
||||
@ -1677,6 +1684,7 @@ def run_disaggregated_aiperf(config_file,
|
||||
cleanup_output_files()
|
||||
run_env = env.copy()
|
||||
run_env["UCX_TLS"] = "^ib"
|
||||
run_env["UCX_MM_ERROR_HANDLING"] = "y"
|
||||
|
||||
workers_cmd = [
|
||||
'mpirun', '--allow-run-as-root', '--oversubscribe', '-n',
|
||||
|
||||
@ -343,9 +343,6 @@ triton_server/test_triton.py::test_gpt_ib_lad[gpt-ib-lad] SKIP (https://nvbugs/5
|
||||
unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_cute_dsl_multi_gpu[MoEWeightLoadingMode.FUSED_GATE_UP_PROJ-DefaultMoeRoutingMethod-1] SKIP (https://nvbugs/5775256)
|
||||
unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_ep_sharding.py::test_ep_shard[3-2] SKIP (https://nvbugs/5777041)
|
||||
unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_ep_sharding.py::test_ep_shard[8-2] SKIP (https://nvbugs/5777041)
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8] SKIP (https://nvbugs/5769890)
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5769890)
|
||||
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5769890,https://nvbugs/5748683)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_pp4_mtp] SKIP (https://nvbugs/5779536)
|
||||
unittest/_torch/attention/test_flashinfer_star_attn.py::TestStarAttention::test_flashinfer_star_attention[num_layers:2-num_heads:32-num_kv_heads:8-head_dim:64-anchor_size:64-block_size:64-dtype:torch.float16] SKIP (https://nvbugs/5781389)
|
||||
unittest/_torch/ray_orchestrator/multi_gpu/test_ops.py::test_reducescatter_pg_op[var_len:True-seqlen:16-hidden:128] SKIP (https://nvbugs/5781383)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user