[https://nvbugs/5448437][fix] fix some nixl tests (#6940)

Signed-off-by: Bo Deng <deemod@nvidia.com>
2026-01-14 06:27:45 +08:00 · 2025-08-20 14:19:48 +08:00 · 2025-08-20 14:19:48 +08:00 · df00c81aea
commit df00c81aea
parent fae43e7b46
4 changed files with 10 additions and 11 deletions
--- a/tests/integration/defs/accuracy/test_disaggregated_serving.py
+++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@ -21,7 +21,7 @@ from tensorrt_llm.llmapi import CompletionOutput, RequestOutput, SamplingParams
 from tensorrt_llm.llmapi.llm_args import LlmArgs

 from ..conftest import (get_device_count, llm_models_root, parametrize_with_ids,
-                        skip_pre_hopper)
+                        skip_no_hopper, skip_pre_hopper)
 from ..trt_test_alternative import popen
 from .accuracy_core import (GSM8K, MMLU, LlmapiAccuracyTestHarness,
                            get_accuracy_task)
@ -508,6 +508,7 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):

    @pytest.mark.skip_less_device(2)
    @pytest.mark.skip_less_device_memory(60000)
+    @skip_no_hopper
    def test_nixl_backend(self):
        ctx_server_config = {
            "disable_overlap_scheduler": True,
@ -642,6 +643,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
    MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8"

    @pytest.mark.skip_less_device(2)
+    @skip_no_hopper
    def test_nixl_backend(self):
        ctx_server_config = {
            "disable_overlap_scheduler": True,
@ -673,8 +675,6 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
        with launch_disaggregated_llm(disaggregated_server_config,
                                      ctx_server_config, gen_server_config,
                                      self.MODEL_PATH) as llm:
-            task = MMLU(self.MODEL_NAME)
-            task.evaluate(llm)
            task = GSM8K(self.MODEL_NAME)
            task.evaluate(llm)

--- a/tests/integration/defs/disaggregated/test_disaggregated.py
+++ b/tests/integration/defs/disaggregated/test_disaggregated.py
@ -20,7 +20,8 @@ import tempfile

 import pytest
 import yaml
-from defs.conftest import llm_models_root, skip_arm, skip_no_hopper
+from defs.conftest import (get_sm_version, llm_models_root, skip_arm,
+                           skip_no_hopper)
 from defs.trt_test_alternative import check_call, check_output, popen

 from tensorrt_llm.logger import logger
@ -1212,7 +1213,7 @@ def get_config_for_benchmark(model_root, backend):
            "num_instances": 1,
            "max_batch_size": 2,
            "max_num_tokens": 384,
-            "max_seq_len": 320,
+            "max_seq_len": 384,
            "tensor_parallel_size": 1,
            "pipeline_parallel_size": 1,
            "disable_overlap_scheduler": True,
@ -1228,7 +1229,7 @@ def get_config_for_benchmark(model_root, backend):
            "pipeline_parallel_size": 1,
            "max_batch_size": 2,
            "max_num_tokens": 384,
-            "max_seq_len": 320,
+            "max_seq_len": 384,
            "cache_transceiver_config": {
                "backend": backend,
                "max_tokens_in_buffer": 512,
@ -1247,6 +1248,9 @@ def get_config_for_benchmark(model_root, backend):
 def test_disaggregated_benchmark_on_diff_backends(
        disaggregated_test_root, disaggregated_example_root, llm_venv,
        benchmark_model_root, benchmark_root, shared_gpt_path):
+    if "DeepSeek-V3-Lite" in benchmark_model_root and "fp8" in benchmark_model_root and get_sm_version(
+    ) != 90:
+        pytest.skip("The test should only run on Hopper")
    nixl_config = get_config_for_benchmark(benchmark_model_root, "nixl")
    ucx_config = get_config_for_benchmark(benchmark_model_root, "ucx")
    temp_dir = tempfile.TemporaryDirectory()
--- a/tests/integration/test_lists/test-db/l0_dgx_b200.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_b200.yml
@ -72,6 +72,3 @@ l0_dgx_b200:
  - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16]
  - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf]
  - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8]
-  - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8]
-  - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
-  - accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -274,8 +274,6 @@ examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-9b-it] SK
 examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-27b-it] SKIP (https://nvbugs/5434451)
 examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-3-1b-it] SKIP (https://nvbugs/5434451)
 accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_eagle3] SKIP (https://nvbugs/5437384)
-accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend SKIP (https://nvbugs/5448437)
-disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5448449)
 test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False] SKIP (https://nvbugs/5444095)
 full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837)
 full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837)