diff --git a/tests/integration/defs/disaggregated/test_auto_scaling.py b/tests/integration/defs/disaggregated/test_auto_scaling.py index f270570e1e..52838c8b20 100644 --- a/tests/integration/defs/disaggregated/test_auto_scaling.py +++ b/tests/integration/defs/disaggregated/test_auto_scaling.py @@ -12,9 +12,9 @@ import openai import pytest import requests import yaml +from defs.common import get_free_port_in_ci as get_free_port from defs.conftest import llm_models_root -from tensorrt_llm._utils import get_free_port from tensorrt_llm.logger import logger HEARTBEAT_INTERVAL = 1 @@ -454,7 +454,7 @@ async def test_worker_restart(model_name, disagg_server_config, worker_config, port=disagg_port) print(response) # kill gen1, the request should fail - terminate(gen_worker1, release_port=False) + terminate(gen_worker1, release_port=True) await asyncio.sleep(CHECK_STATUS_INTERVAL) verify_cluster_info(False, 1, 0, port=disagg_port) with pytest.raises(Exception): @@ -480,7 +480,7 @@ async def test_worker_restart(model_name, disagg_server_config, worker_config, assert len(response.choices[0].text) >= 1 # kill ctx1, the request should fail - terminate(ctx_worker1, release_port=False) + terminate(ctx_worker1, release_port=True) await asyncio.sleep(CHECK_STATUS_INTERVAL) verify_cluster_info(False, 0, 1, port=disagg_port) with pytest.raises(Exception): @@ -500,16 +500,16 @@ async def test_worker_restart(model_name, disagg_server_config, worker_config, assert len(response.choices[0].text) >= 1 # start ctx1 and gen1 again, we have 2 ctxs and 2 gens now - await wait_for_port_released(ctx_worker1.port) - await wait_for_port_released(gen_worker1.port) ctx_worker1 = run_ctx_worker(model_name, worker_config, work_dir, - port=ctx_worker1.port) + port=0, + device=0) gen_worker1 = run_gen_worker(model_name, worker_config, work_dir, - port=gen_worker1.port) + port=0, + device=1) await wait_for_worker_ready(ctx_worker1.port) await wait_for_worker_ready(gen_worker1.port) await asyncio.sleep(CHECK_STATUS_INTERVAL) @@ -556,6 +556,7 @@ async def test_disagg_server_restart(model_name, disagg_server_config, terminate(disagg_server) # wait for the port to be released, so we can rebind the new process to the same port await wait_for_port_released(disagg_port) + await asyncio.sleep(CHECK_STATUS_INTERVAL) with pytest.raises(requests.exceptions.RequestException): verify_cluster_info(False, diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml index bcda6ba3f8..3e8dc268a8 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml @@ -116,17 +116,12 @@ l0_dgx_h100: - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU] - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_bf16_4gpu[tp4ep4_cudagraph_overlap] - disaggregated/test_auto_scaling.py::test_service_discovery[etcd-round_robin] - - disaggregated/test_auto_scaling.py::test_service_discovery[etcd-load_balancing] - - disaggregated/test_auto_scaling.py::test_worker_restart[etcd-round_robin] - disaggregated/test_auto_scaling.py::test_worker_restart[etcd-load_balancing] - disaggregated/test_auto_scaling.py::test_minimal_instances[etcd-round_robin] - disaggregated/test_auto_scaling.py::test_disagg_server_restart[etcd-round_robin] - disaggregated/test_auto_scaling.py::test_service_discovery[http-round_robin] - - disaggregated/test_auto_scaling.py::test_service_discovery[http-load_balancing] - disaggregated/test_auto_scaling.py::test_service_discovery[http-kv_cache_aware] - - disaggregated/test_auto_scaling.py::test_worker_restart[http-round_robin] - disaggregated/test_auto_scaling.py::test_worker_restart[http-load_balancing] - - disaggregated/test_auto_scaling.py::test_worker_restart[http-kv_cache_aware] - disaggregated/test_auto_scaling.py::test_minimal_instances[http-round_robin] - disaggregated/test_auto_scaling.py::test_disagg_server_restart[http-round_robin] - condition: diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index dae2169ea4..e11acf6363 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -454,10 +454,6 @@ accuracy/test_cli_flow.py::TestPhi3Small128kInstruct::test_auto_dtype SKIP (http accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype SKIP (https://nvbugs/5744293) unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py::test_build_run_llama4_vlm SKIP (https://nvbugs/5747878) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377) -disaggregated/test_auto_scaling.py::test_minimal_instances[etcd-round_robin] SKIP (https://nvbugs/5748564) -disaggregated/test_auto_scaling.py::test_service_discovery[etcd-load_balancing] SKIP (https://nvbugs/5757415) -disaggregated/test_auto_scaling.py::test_service_discovery[http-kv_cache_aware] SKIP (https://nvbugs/5758225) -unittest/llmapi/apps/test_disagg_serving_perf_metrics.py SKIP (https://nvbugs/5752516) unittest/_torch/attention/test_trtllm_flashinfer_symbol_collision.py::test_flashinfer_fused_moe_matches_torch_moe SKIP (https://nvbugs/5752521) cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=0] SKIP (https://nvbugs/5748600)