diff --git a/tests/integration/defs/stress_test/stress_test.py b/tests/integration/defs/stress_test/stress_test.py
index f81f0ab2bd..f5fed7b0eb 100644
--- a/tests/integration/defs/stress_test/stress_test.py
+++ b/tests/integration/defs/stress_test/stress_test.py
@@ -32,6 +32,7 @@ import contextlib
 import json
 import os
 import re
+import socket
 import subprocess
 import tempfile
 import threading
@@ -44,7 +45,7 @@ import pandas as pd
 import pytest
 import requests
 import yaml
-from defs.common import parse_gsm8k_output
+from defs.common import get_free_port_in_ci, parse_gsm8k_output
 from defs.conftest import get_device_count, get_device_memory, llm_models_root
 from defs.trt_test_alternative import (Popen, cleanup_process_tree, print_info,
                                        print_warning)
@@ -72,10 +73,18 @@ from defs.trt_test_alternative import (Popen, cleanup_process_tree, print_info,
 GRACEFUL_TERMINATION_TIMEOUT = 300  # seconds - set longer when stress large model
 
 
+def _get_default_port() -> int:
+    """Get a default port using CI allocation if available, otherwise use 8000."""
+    try:
+        return get_free_port_in_ci()
+    except Exception:
+        return 8000
+
+
 @dataclass(frozen=True)
 class ServerConfig:
     """Dataclass to store server configuration for trtllm-serve"""
-    port: int = 8000
+    port: int = field(default_factory=_get_default_port)
     host: str = "localhost"
     pp_size: int = 1
     ep_size: Optional[int] = 1
@@ -167,8 +176,7 @@ class PerformanceParams:
     # Ensure indefinite runs specially for different concurrency values
     test_timeout: int = 3600  # 1 hours for tinyllama and llama-v3-8b-instruct-hf
     concurrency_list: List[int] = field(
-        default_factory=lambda:
-        [8, 16, 32, 64, 128, 256, 384, 512, 640, 768, 896, 1024])
+        default_factory=lambda: [8, 16, 32, 64, 128, 256])
 
     @property
     def request_count_list(self) -> List[int]:
@@ -341,6 +349,26 @@ def check_server_health(server_url: str,
         return False, f"Unexpected error during health check: {str(e)}"
 
 
+def is_port_available(port: int,
+                      host: str = "localhost") -> Tuple[bool, Optional[str]]:
+    """
+    Check if a port is available for binding.
+
+    Args:
+        port: Port number to check
+        host: Host to bind to
+
+    Returns:
+        Tuple of (is_available, error_message)
+    """
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        try:
+            s.bind((host, port))
+            return True, None
+        except OSError as e:
+            return False, f"Port {port} is already in use on {host}: {e}"
+
+
 @pytest.mark.parametrize(
     "test_mode",
     ["stress-test", "stress-stage-alone", "stress-test-with-accuracy"],
@@ -519,6 +547,10 @@ def stress_test(config,
     else:
         stress_config = None
 
+    # Check if port is available
+    is_available, port_error = is_port_available(test_server_config.port,
+                                                 test_server_config.host)
+
     # Check if server is already running
     is_healthy, _ = check_server_health(test_server_config.url,
                                         test_server_config.health_check_timeout)
@@ -530,6 +562,9 @@ def stress_test(config,
     # Start server
     print_info("Starting trtllm-serve server...")
     print_info(f"Model path: {model_path}")
+    print_info(
+        f"Server port: {test_server_config.port} (allocated via CI port mechanism)"
+    )
 
     # Verify that model path exists
     if not os.path.exists(model_path):
@@ -552,7 +587,7 @@ def stress_test(config,
             extra_llm_options.update({
                 "cuda_graph_config": {
                     "enable_padding": True,
-                    "batch_sizes": [1, 2, 4, 8, 16, 32, 64, 128, 256, 384],
+                    "batch_sizes": [1, 2, 4, 8, 16, 32, 64, 128],
                 },
                 "print_iter_log": True,
             })
@@ -759,6 +794,7 @@ def create_aiperf_command(model_name,
                           model_path,
                           request_count,
                           concurrency,
+                          server_url,
                           input_len_mean=PerformanceParams.input_len_mean,
                           input_len_std=PerformanceParams.input_len_std,
                           output_len_mean=PerformanceParams.output_len_mean,
@@ -772,6 +808,7 @@ def create_aiperf_command(model_name,
         model_path: Path to the model
         request_count: Number of requests to send
         concurrency: Number of concurrent requests
+        server_url: URL of the server (e.g., "localhost:8000")
         input_len_mean: Mean input length
         input_len_std: Standard deviation of input length
         output_len_mean: Mean output length
@@ -790,6 +827,8 @@ def create_aiperf_command(model_name,
         model_path,
         "--endpoint-type",
         "completions",
+        "-u",
+        server_url,
         "--random-seed",
         "123",
         "--synthetic-input-tokens-mean",
@@ -928,6 +967,7 @@ def measure_capacity_stage(model_name,
             model_path=model_path,
             request_count=request_count,
             concurrency=concurrency,
+            server_url=f"{server_config.host}:{server_config.port}",
             input_len_mean=performance_params.input_len_mean,
             input_len_std=performance_params.input_len_std,
             output_len_mean=performance_params.output_len_mean,
@@ -1023,6 +1063,7 @@ def stress_stage(model_name,
         model_path=model_path,
         request_count=request_count,
         concurrency=stress_concurrency,
+        server_url=f"{server_config.host}:{server_config.port}",
         input_len_mean=PerformanceParams.input_len_mean,
         input_len_std=PerformanceParams.input_len_std,
         output_len_mean=PerformanceParams.output_len_mean,
diff --git a/tests/integration/test_lists/test-db/l0_a10.yml b/tests/integration/test_lists/test-db/l0_a10.yml
index c8dc811a37..3d8228b479 100644
--- a/tests/integration/test_lists/test-db/l0_a10.yml
+++ b/tests/integration/test_lists/test-db/l0_a10.yml
@@ -242,8 +242,8 @@ l0_a10:
   - accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=False-typical_acceptance=True] # 5 mins
   - accuracy/test_llm_api.py::TestEagleVicuna_7B_v1_3::test_auto_dtype
   - accuracy/test_llm_api.py::TestEagle2Vicuna_7B_v1_3::test_auto_dtype
-  - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-trt-stress-test]
   - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-trt-stress-test]
+  - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-trt-stress-test]
   - test_e2e.py::test_gpt3_175b_1layers_build_only # 6 mins
   - examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-disable_weight_only]
   - unittest/trt/model/test_mamba.py # 3 mins
@@ -263,8 +263,8 @@ l0_a10:
       stage: post_merge
       backend: pytorch
   tests:
-  - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test]
   - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-pytorch-stress-test]
+  - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test]
 - condition:
     ranges:
       system_gpu_count:
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index 08ae4a8399..8f661504ec 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -340,7 +340,7 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mt
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5800672)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency] SKIP (https://nvbugs/5814309)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] SKIP (https://nvbugs/5800646)
-stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test] SKIP (https://nvbugs/5814203)
+unittest/_torch/attention/test_trtllm_flashinfer_symbol_collision.py::test_flashinfer_fused_moe_matches_torch_moe SKIP (https://nvbugs/5814215)
 full:sm89/accuracy/test_llm_api_pytorch_multimodal.py::TestNVILA_8B::test_auto_dtype SKIP (https://nvbugs/5814504)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] SKIP (https://nvbugs/5819005)
 unittest/llmapi/test_mpi_session.py::test_llmapi_launch_multiple_tasks SKIP (https://nvbugs/5819014)
@@ -384,6 +384,8 @@ perf/test_perf_sanity.py::test_e2e[aggr_upload-deepseek_r1_fp4_v2_grace_blackwel
 perf/test_perf_sanity.py::test_e2e[disagg_upload-deepseek-r1-fp4_1k1k_ctx1_gen1_dep8_bs768_eplb0_mtp0_ccb-UCX] SKIP (https://nvbugs/5819053)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=True] SKIP (https://nvbugs/5826604)
 disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5834212)
+examples/test_ray.py::test_llm_inference_distributed_ray[tp2pp2] SKIP (https://nvbugs/5781731)
+test_e2e.py::test_openai_chat_harmony SKIP (https://nvbugs/5819444)
 accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8 SKIP (https://nvbugs/5819452)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5800646)
 accuracy/test_disaggregated_serving.py::TestQwen3_30B_A3B::test_mixed_ctx_gen_model[ctxpp2gentp2] SKIP (https://nvbugs/5748664)