mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5752516][chore] unwaive test; fix port conflicts in CI (#10152)
Signed-off-by: Lizhi Zhou <1432185+reasonsolo@users.noreply.github.com>
This commit is contained in:
parent
cd5cd60ee4
commit
fe12faef81
@ -12,9 +12,9 @@ import openai
|
|||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
|
from defs.common import get_free_port_in_ci as get_free_port
|
||||||
from defs.conftest import llm_models_root
|
from defs.conftest import llm_models_root
|
||||||
|
|
||||||
from tensorrt_llm._utils import get_free_port
|
|
||||||
from tensorrt_llm.logger import logger
|
from tensorrt_llm.logger import logger
|
||||||
|
|
||||||
HEARTBEAT_INTERVAL = 1
|
HEARTBEAT_INTERVAL = 1
|
||||||
@ -454,7 +454,7 @@ async def test_worker_restart(model_name, disagg_server_config, worker_config,
|
|||||||
port=disagg_port)
|
port=disagg_port)
|
||||||
print(response)
|
print(response)
|
||||||
# kill gen1, the request should fail
|
# kill gen1, the request should fail
|
||||||
terminate(gen_worker1, release_port=False)
|
terminate(gen_worker1, release_port=True)
|
||||||
await asyncio.sleep(CHECK_STATUS_INTERVAL)
|
await asyncio.sleep(CHECK_STATUS_INTERVAL)
|
||||||
verify_cluster_info(False, 1, 0, port=disagg_port)
|
verify_cluster_info(False, 1, 0, port=disagg_port)
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
@ -480,7 +480,7 @@ async def test_worker_restart(model_name, disagg_server_config, worker_config,
|
|||||||
assert len(response.choices[0].text) >= 1
|
assert len(response.choices[0].text) >= 1
|
||||||
|
|
||||||
# kill ctx1, the request should fail
|
# kill ctx1, the request should fail
|
||||||
terminate(ctx_worker1, release_port=False)
|
terminate(ctx_worker1, release_port=True)
|
||||||
await asyncio.sleep(CHECK_STATUS_INTERVAL)
|
await asyncio.sleep(CHECK_STATUS_INTERVAL)
|
||||||
verify_cluster_info(False, 0, 1, port=disagg_port)
|
verify_cluster_info(False, 0, 1, port=disagg_port)
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
@ -500,16 +500,16 @@ async def test_worker_restart(model_name, disagg_server_config, worker_config,
|
|||||||
assert len(response.choices[0].text) >= 1
|
assert len(response.choices[0].text) >= 1
|
||||||
|
|
||||||
# start ctx1 and gen1 again, we have 2 ctxs and 2 gens now
|
# start ctx1 and gen1 again, we have 2 ctxs and 2 gens now
|
||||||
await wait_for_port_released(ctx_worker1.port)
|
|
||||||
await wait_for_port_released(gen_worker1.port)
|
|
||||||
ctx_worker1 = run_ctx_worker(model_name,
|
ctx_worker1 = run_ctx_worker(model_name,
|
||||||
worker_config,
|
worker_config,
|
||||||
work_dir,
|
work_dir,
|
||||||
port=ctx_worker1.port)
|
port=0,
|
||||||
|
device=0)
|
||||||
gen_worker1 = run_gen_worker(model_name,
|
gen_worker1 = run_gen_worker(model_name,
|
||||||
worker_config,
|
worker_config,
|
||||||
work_dir,
|
work_dir,
|
||||||
port=gen_worker1.port)
|
port=0,
|
||||||
|
device=1)
|
||||||
await wait_for_worker_ready(ctx_worker1.port)
|
await wait_for_worker_ready(ctx_worker1.port)
|
||||||
await wait_for_worker_ready(gen_worker1.port)
|
await wait_for_worker_ready(gen_worker1.port)
|
||||||
await asyncio.sleep(CHECK_STATUS_INTERVAL)
|
await asyncio.sleep(CHECK_STATUS_INTERVAL)
|
||||||
@ -556,6 +556,7 @@ async def test_disagg_server_restart(model_name, disagg_server_config,
|
|||||||
terminate(disagg_server)
|
terminate(disagg_server)
|
||||||
# wait for the port to be released, so we can rebind the new process to the same port
|
# wait for the port to be released, so we can rebind the new process to the same port
|
||||||
await wait_for_port_released(disagg_port)
|
await wait_for_port_released(disagg_port)
|
||||||
|
await asyncio.sleep(CHECK_STATUS_INTERVAL)
|
||||||
|
|
||||||
with pytest.raises(requests.exceptions.RequestException):
|
with pytest.raises(requests.exceptions.RequestException):
|
||||||
verify_cluster_info(False,
|
verify_cluster_info(False,
|
||||||
|
|||||||
@ -116,17 +116,12 @@ l0_dgx_h100:
|
|||||||
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU]
|
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU]
|
||||||
- accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_bf16_4gpu[tp4ep4_cudagraph_overlap]
|
- accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_bf16_4gpu[tp4ep4_cudagraph_overlap]
|
||||||
- disaggregated/test_auto_scaling.py::test_service_discovery[etcd-round_robin]
|
- disaggregated/test_auto_scaling.py::test_service_discovery[etcd-round_robin]
|
||||||
- disaggregated/test_auto_scaling.py::test_service_discovery[etcd-load_balancing]
|
|
||||||
- disaggregated/test_auto_scaling.py::test_worker_restart[etcd-round_robin]
|
|
||||||
- disaggregated/test_auto_scaling.py::test_worker_restart[etcd-load_balancing]
|
- disaggregated/test_auto_scaling.py::test_worker_restart[etcd-load_balancing]
|
||||||
- disaggregated/test_auto_scaling.py::test_minimal_instances[etcd-round_robin]
|
- disaggregated/test_auto_scaling.py::test_minimal_instances[etcd-round_robin]
|
||||||
- disaggregated/test_auto_scaling.py::test_disagg_server_restart[etcd-round_robin]
|
- disaggregated/test_auto_scaling.py::test_disagg_server_restart[etcd-round_robin]
|
||||||
- disaggregated/test_auto_scaling.py::test_service_discovery[http-round_robin]
|
- disaggregated/test_auto_scaling.py::test_service_discovery[http-round_robin]
|
||||||
- disaggregated/test_auto_scaling.py::test_service_discovery[http-load_balancing]
|
|
||||||
- disaggregated/test_auto_scaling.py::test_service_discovery[http-kv_cache_aware]
|
- disaggregated/test_auto_scaling.py::test_service_discovery[http-kv_cache_aware]
|
||||||
- disaggregated/test_auto_scaling.py::test_worker_restart[http-round_robin]
|
|
||||||
- disaggregated/test_auto_scaling.py::test_worker_restart[http-load_balancing]
|
- disaggregated/test_auto_scaling.py::test_worker_restart[http-load_balancing]
|
||||||
- disaggregated/test_auto_scaling.py::test_worker_restart[http-kv_cache_aware]
|
|
||||||
- disaggregated/test_auto_scaling.py::test_minimal_instances[http-round_robin]
|
- disaggregated/test_auto_scaling.py::test_minimal_instances[http-round_robin]
|
||||||
- disaggregated/test_auto_scaling.py::test_disagg_server_restart[http-round_robin]
|
- disaggregated/test_auto_scaling.py::test_disagg_server_restart[http-round_robin]
|
||||||
- condition:
|
- condition:
|
||||||
|
|||||||
@ -454,10 +454,6 @@ accuracy/test_cli_flow.py::TestPhi3Small128kInstruct::test_auto_dtype SKIP (http
|
|||||||
accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
|
accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
|
||||||
unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py::test_build_run_llama4_vlm SKIP (https://nvbugs/5747878)
|
unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py::test_build_run_llama4_vlm SKIP (https://nvbugs/5747878)
|
||||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377)
|
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377)
|
||||||
disaggregated/test_auto_scaling.py::test_minimal_instances[etcd-round_robin] SKIP (https://nvbugs/5748564)
|
|
||||||
disaggregated/test_auto_scaling.py::test_service_discovery[etcd-load_balancing] SKIP (https://nvbugs/5757415)
|
|
||||||
disaggregated/test_auto_scaling.py::test_service_discovery[http-kv_cache_aware] SKIP (https://nvbugs/5758225)
|
|
||||||
unittest/llmapi/apps/test_disagg_serving_perf_metrics.py SKIP (https://nvbugs/5752516)
|
|
||||||
unittest/_torch/attention/test_trtllm_flashinfer_symbol_collision.py::test_flashinfer_fused_moe_matches_torch_moe SKIP (https://nvbugs/5752521)
|
unittest/_torch/attention/test_trtllm_flashinfer_symbol_collision.py::test_flashinfer_fused_moe_matches_torch_moe SKIP (https://nvbugs/5752521)
|
||||||
cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941)
|
cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941)
|
||||||
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=0] SKIP (https://nvbugs/5748600)
|
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=0] SKIP (https://nvbugs/5748600)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user