mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][chore] Remove two unused parameters in create_py_executor (#7458)
Signed-off-by: leslie-fang25 <leslief@nvidia.com>
This commit is contained in:
parent
5ff3a65b23
commit
bd9ba97d89
@ -14,9 +14,7 @@ from tensorrt_llm._torch.pyexecutor.resource_manager import ResourceManagerType
|
||||
from tensorrt_llm._utils import get_sm_version
|
||||
from tensorrt_llm.bindings.executor import (CapacitySchedulerPolicy,
|
||||
ContextChunkingPolicy,
|
||||
ExecutorConfig,
|
||||
LogitsPostProcessorConfig,
|
||||
ParallelConfig)
|
||||
ExecutorConfig)
|
||||
from tensorrt_llm.bindings.internal.batch_manager import ContextChunkingConfig
|
||||
from tensorrt_llm.llmapi.llm_args import KvCacheConnectorConfig, TorchLlmArgs
|
||||
from tensorrt_llm.llmapi.tokenizer import TokenizerBase
|
||||
@ -217,14 +215,9 @@ def create_py_executor(
|
||||
tokenizer: Optional[TokenizerBase] = None,
|
||||
lora_config: Optional[LoraConfig] = None,
|
||||
kv_connector_config: Optional[KvCacheConnectorConfig] = None,
|
||||
logits_post_processor_config: Optional[LogitsPostProcessorConfig] = None,
|
||||
parallel_config: Optional[ParallelConfig] = None,
|
||||
) -> PyExecutor:
|
||||
|
||||
executor_config = llm_args.get_executor_config(checkpoint_dir, tokenizer)
|
||||
executor_config.logits_post_processor_config = logits_post_processor_config
|
||||
executor_config.parallel_config = parallel_config
|
||||
|
||||
garbage_collection_gen0_threshold = llm_args.garbage_collection_gen0_threshold
|
||||
|
||||
_mangle_executor_config(executor_config)
|
||||
|
||||
@ -113,6 +113,7 @@ class GenerationExecutorWorker(GenerationExecutor):
|
||||
assert hasattr(
|
||||
self.llm_args, "backend"
|
||||
), "llm_args should be with backend in _create_py_executor"
|
||||
_ = _get_comm_ranks_device_id()
|
||||
if self.llm_args.backend == "pytorch":
|
||||
from tensorrt_llm._torch.pyexecutor.py_executor_creator import \
|
||||
create_py_executor
|
||||
@ -122,13 +123,6 @@ class GenerationExecutorWorker(GenerationExecutor):
|
||||
args["tokenizer"] = tokenizer
|
||||
args["lora_config"] = lora_config
|
||||
args["kv_connector_config"] = kv_connector_config
|
||||
args[
|
||||
"logits_post_processor_config"] = tllm.LogitsPostProcessorConfig(
|
||||
processor_batched=batched_logits_processor,
|
||||
replicate=False)
|
||||
comm_ranks, device_ids = _get_comm_ranks_device_id()
|
||||
args["parallel_config"] = tllm.ParallelConfig(
|
||||
participant_ids=comm_ranks, device_ids=device_ids)
|
||||
elif self.llm_args.backend == "_autodeploy":
|
||||
from tensorrt_llm._torch.auto_deploy.llm_args import \
|
||||
LlmArgs as ADLlmArgs
|
||||
|
||||
Loading…
Reference in New Issue
Block a user