[None][chore] fix llmargs conflict (#8152)

Signed-off-by: Yan Chunwei <328693+Superjomn@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-10-06 17:34:27 +08:00 · 2025-10-06 17:34:27 +08:00 · 54ab9767b5
commit 54ab9767b5
parent fba351a211
4 changed files with 11 additions and 13 deletions
--- a/tensorrt_llm/llmapi/llm_args.py
+++ b/tensorrt_llm/llmapi/llm_args.py
@ -1550,7 +1550,7 @@ class BaseLlmArgs(StrictBaseModel):
                                      description="Return perf metrics.",
                                      status="prototype")

-    orchestrator_type: Optional[Literal["rpc"]] = Field(
+    orchestrator_type: Optional[Literal["rpc", "ray"]] = Field(
        default=None,
        description=
        "The orchestrator type to use. Defaults to None, which uses MPI.",
@ -2444,13 +2444,6 @@ class TorchLlmArgs(BaseLlmArgs):
        status="prototype",
    )

-    orchestrator_type: Optional[Literal["ray"]] = Field(
-        default=None,
-        description=
-        "The orchestrator type to use. Options: 'ray'. Defaults to None, which uses MPI.",
-        status="prototype",
-    )
-
    # PrivateVars
    _quant_config: Optional[QuantConfig] = PrivateAttr(default=None)

--- a/tests/unittest/api_stability/references/llm.yaml
+++ b/tests/unittest/api_stability/references/llm.yaml
@ -75,10 +75,6 @@ methods:
        annotation: Optional[str]
        default: null
        status: deprecated
-      orchestrator_type:
-        annotation: Optional[Literal['ray']]
-        default: null
-        status: prototype
      build_config:
        annotation: Optional[tensorrt_llm.llmapi.llm_args.BuildConfig]
        default: null
@ -184,7 +180,7 @@ methods:
        default: False
        status: prototype
      orchestrator_type:
-        annotation: Optional[Literal["rpc"]]
+        annotation: Optional[Literal["rpc", "ray"]]
        default: null
        status: prototype
    return_annotation: None
--- a/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py
+++ b/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py
@ -8,6 +8,7 @@ from tensorrt_llm.lora_helper import LoraConfig
 from .lora_test_utils import check_llama_7b_multi_lora_from_request_test_harness
 from .test_llm_pytorch import llama_7b_lora_from_dir_test_harness
 from .test_llm import _test_llm_capture_request_error
+from utils.util import skip_ray
 # isort: on
 from tensorrt_llm.executor.rpc_proxy import GenerationExecutorRpcProxy
 from tensorrt_llm.sampling_params import SamplingParams
@ -61,6 +62,8 @@ def test_llama_7b_multi_lora_tp2():
        cuda_graph_config=None)


+@pytest.mark.skip(reason="https://nvbugs/5560921")
+@skip_ray
@pytest.mark.gpu2
 def test_llm_rpc_tp2():
    with LLM(model=llama_model_path,
@ -78,6 +81,8 @@ def test_llm_rpc_tp2():
        assert len(res.outputs[0].token_ids) == 10


+@pytest.mark.skip(reason="https://nvbugs/5560921")
+@skip_ray
@pytest.mark.gpu2
@pytest.mark.asyncio
 async def test_llm_rpc_streaming_tp2():
--- a/tests/unittest/llmapi/test_llm_pytorch.py
+++ b/tests/unittest/llmapi/test_llm_pytorch.py
@ -956,6 +956,8 @@ class TestLlmError:
            llm.generate([ids])


+@pytest.mark.skip(reason="https://nvbugs/5560921")
+@skip_ray
 def test_llm_rpc():
    # TODO: remove the with-statement when shutdown hang issue is fixed
    with LLM(model=llama_model_path,
@ -972,6 +974,8 @@ def test_llm_rpc():
        assert len(res.outputs[0].token_ids) == 10


+@pytest.mark.skip(reason="https://nvbugs/5560921")
+@skip_ray
@pytest.mark.asyncio
 async def test_llm_rpc_streaming():
    # TODO: remove the with-statement when shutdown hang issue is fixed