mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][chore] fix llmargs conflict (#8152)
Signed-off-by: Yan Chunwei <328693+Superjomn@users.noreply.github.com>
This commit is contained in:
parent
fba351a211
commit
54ab9767b5
@ -1550,7 +1550,7 @@ class BaseLlmArgs(StrictBaseModel):
|
||||
description="Return perf metrics.",
|
||||
status="prototype")
|
||||
|
||||
orchestrator_type: Optional[Literal["rpc"]] = Field(
|
||||
orchestrator_type: Optional[Literal["rpc", "ray"]] = Field(
|
||||
default=None,
|
||||
description=
|
||||
"The orchestrator type to use. Defaults to None, which uses MPI.",
|
||||
@ -2444,13 +2444,6 @@ class TorchLlmArgs(BaseLlmArgs):
|
||||
status="prototype",
|
||||
)
|
||||
|
||||
orchestrator_type: Optional[Literal["ray"]] = Field(
|
||||
default=None,
|
||||
description=
|
||||
"The orchestrator type to use. Options: 'ray'. Defaults to None, which uses MPI.",
|
||||
status="prototype",
|
||||
)
|
||||
|
||||
# PrivateVars
|
||||
_quant_config: Optional[QuantConfig] = PrivateAttr(default=None)
|
||||
|
||||
|
||||
@ -75,10 +75,6 @@ methods:
|
||||
annotation: Optional[str]
|
||||
default: null
|
||||
status: deprecated
|
||||
orchestrator_type:
|
||||
annotation: Optional[Literal['ray']]
|
||||
default: null
|
||||
status: prototype
|
||||
build_config:
|
||||
annotation: Optional[tensorrt_llm.llmapi.llm_args.BuildConfig]
|
||||
default: null
|
||||
@ -184,7 +180,7 @@ methods:
|
||||
default: False
|
||||
status: prototype
|
||||
orchestrator_type:
|
||||
annotation: Optional[Literal["rpc"]]
|
||||
annotation: Optional[Literal["rpc", "ray"]]
|
||||
default: null
|
||||
status: prototype
|
||||
return_annotation: None
|
||||
|
||||
@ -8,6 +8,7 @@ from tensorrt_llm.lora_helper import LoraConfig
|
||||
from .lora_test_utils import check_llama_7b_multi_lora_from_request_test_harness
|
||||
from .test_llm_pytorch import llama_7b_lora_from_dir_test_harness
|
||||
from .test_llm import _test_llm_capture_request_error
|
||||
from utils.util import skip_ray
|
||||
# isort: on
|
||||
from tensorrt_llm.executor.rpc_proxy import GenerationExecutorRpcProxy
|
||||
from tensorrt_llm.sampling_params import SamplingParams
|
||||
@ -61,6 +62,8 @@ def test_llama_7b_multi_lora_tp2():
|
||||
cuda_graph_config=None)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugs/5560921")
|
||||
@skip_ray
|
||||
@pytest.mark.gpu2
|
||||
def test_llm_rpc_tp2():
|
||||
with LLM(model=llama_model_path,
|
||||
@ -78,6 +81,8 @@ def test_llm_rpc_tp2():
|
||||
assert len(res.outputs[0].token_ids) == 10
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugs/5560921")
|
||||
@skip_ray
|
||||
@pytest.mark.gpu2
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_rpc_streaming_tp2():
|
||||
|
||||
@ -956,6 +956,8 @@ class TestLlmError:
|
||||
llm.generate([ids])
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugs/5560921")
|
||||
@skip_ray
|
||||
def test_llm_rpc():
|
||||
# TODO: remove the with-statement when shutdown hang issue is fixed
|
||||
with LLM(model=llama_model_path,
|
||||
@ -972,6 +974,8 @@ def test_llm_rpc():
|
||||
assert len(res.outputs[0].token_ids) == 10
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugs/5560921")
|
||||
@skip_ray
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_rpc_streaming():
|
||||
# TODO: remove the with-statement when shutdown hang issue is fixed
|
||||
|
||||
Loading…
Reference in New Issue
Block a user