[None][chore] fix llmargs conflict (#8152)

Signed-off-by: Yan Chunwei <328693+Superjomn@users.noreply.github.com>
This commit is contained in:
Yan Chunwei 2025-10-06 17:34:27 +08:00 committed by GitHub
parent fba351a211
commit 54ab9767b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 11 additions and 13 deletions

View File

@ -1550,7 +1550,7 @@ class BaseLlmArgs(StrictBaseModel):
description="Return perf metrics.",
status="prototype")
orchestrator_type: Optional[Literal["rpc"]] = Field(
orchestrator_type: Optional[Literal["rpc", "ray"]] = Field(
default=None,
description=
"The orchestrator type to use. Defaults to None, which uses MPI.",
@ -2444,13 +2444,6 @@ class TorchLlmArgs(BaseLlmArgs):
status="prototype",
)
orchestrator_type: Optional[Literal["ray"]] = Field(
default=None,
description=
"The orchestrator type to use. Options: 'ray'. Defaults to None, which uses MPI.",
status="prototype",
)
# PrivateVars
_quant_config: Optional[QuantConfig] = PrivateAttr(default=None)

View File

@ -75,10 +75,6 @@ methods:
annotation: Optional[str]
default: null
status: deprecated
orchestrator_type:
annotation: Optional[Literal['ray']]
default: null
status: prototype
build_config:
annotation: Optional[tensorrt_llm.llmapi.llm_args.BuildConfig]
default: null
@ -184,7 +180,7 @@ methods:
default: False
status: prototype
orchestrator_type:
annotation: Optional[Literal["rpc"]]
annotation: Optional[Literal["rpc", "ray"]]
default: null
status: prototype
return_annotation: None

View File

@ -8,6 +8,7 @@ from tensorrt_llm.lora_helper import LoraConfig
from .lora_test_utils import check_llama_7b_multi_lora_from_request_test_harness
from .test_llm_pytorch import llama_7b_lora_from_dir_test_harness
from .test_llm import _test_llm_capture_request_error
from utils.util import skip_ray
# isort: on
from tensorrt_llm.executor.rpc_proxy import GenerationExecutorRpcProxy
from tensorrt_llm.sampling_params import SamplingParams
@ -61,6 +62,8 @@ def test_llama_7b_multi_lora_tp2():
cuda_graph_config=None)
@pytest.mark.skip(reason="https://nvbugs/5560921")
@skip_ray
@pytest.mark.gpu2
def test_llm_rpc_tp2():
with LLM(model=llama_model_path,
@ -78,6 +81,8 @@ def test_llm_rpc_tp2():
assert len(res.outputs[0].token_ids) == 10
@pytest.mark.skip(reason="https://nvbugs/5560921")
@skip_ray
@pytest.mark.gpu2
@pytest.mark.asyncio
async def test_llm_rpc_streaming_tp2():

View File

@ -956,6 +956,8 @@ class TestLlmError:
llm.generate([ids])
@pytest.mark.skip(reason="https://nvbugs/5560921")
@skip_ray
def test_llm_rpc():
# TODO: remove the with-statement when shutdown hang issue is fixed
with LLM(model=llama_model_path,
@ -972,6 +974,8 @@ def test_llm_rpc():
assert len(res.outputs[0].token_ids) == 10
@pytest.mark.skip(reason="https://nvbugs/5560921")
@skip_ray
@pytest.mark.asyncio
async def test_llm_rpc_streaming():
# TODO: remove the with-statement when shutdown hang issue is fixed