mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
chore: remove draft_model_engine from init parameter list of PyExecutor (#6325)
Signed-off-by: junq <22017000+QiJune@users.noreply.github.com>
This commit is contained in:
parent
1f39a11af0
commit
2fe9cc0889
@ -411,7 +411,6 @@ def create_py_executor_instance(
|
||||
executor_config,
|
||||
ctx_chunk_config,
|
||||
model_engine,
|
||||
draft_model_engine,
|
||||
start_worker,
|
||||
sampler,
|
||||
drafter,
|
||||
@ -551,7 +550,6 @@ def create_py_executor_instance(
|
||||
max_draft_len=spec_config.max_draft_len
|
||||
if spec_config is not None else 0,
|
||||
kv_cache_transceiver=kv_cache_transceiver,
|
||||
draft_model_engine=draft_model_engine,
|
||||
guided_decoder=guided_decoder,
|
||||
start_worker=start_worker,
|
||||
garbage_collection_gen0_threshold=garbage_collection_gen0_threshold)
|
||||
|
||||
@ -140,7 +140,6 @@ class PyExecutor:
|
||||
max_beam_width: int = 1,
|
||||
max_draft_len: int = 0,
|
||||
kv_cache_transceiver: Optional[KvCacheTransceiver] = None,
|
||||
draft_model_engine: Optional[ModelEngine] = None,
|
||||
guided_decoder: Optional[GuidedDecoder] = None,
|
||||
garbage_collection_gen0_threshold: Optional[int] = None,
|
||||
start_worker: bool = True):
|
||||
@ -161,13 +160,12 @@ class PyExecutor:
|
||||
self.enable_attention_dp = model_engine.enable_attention_dp
|
||||
self.sampler = sampler
|
||||
self.drafter = drafter
|
||||
self.draft_model_engine = getattr(self.drafter, "draft_model_engine",
|
||||
None)
|
||||
self.guided_decoder = guided_decoder
|
||||
self.dist = dist
|
||||
self.disable_overlap_scheduler = disable_overlap_scheduler
|
||||
|
||||
# Draft model for certain spec decode algorithms, e.g. EAGLE3
|
||||
self.draft_model_engine = draft_model_engine
|
||||
|
||||
# enqueue and _fetch_new_requests used data
|
||||
self.next_req_id = max_batch_size # The first max_batch_size request IDs are reserved for dummy requests
|
||||
self.max_beam_width = max_beam_width
|
||||
|
||||
@ -382,7 +382,6 @@ def create_py_executor(
|
||||
executor_config=executor_config,
|
||||
ctx_chunk_config=ctx_chunk_config,
|
||||
model_engine=model_engine,
|
||||
draft_model_engine=draft_model_engine,
|
||||
start_worker=False,
|
||||
sampler=sampler,
|
||||
drafter=drafter,
|
||||
@ -425,7 +424,6 @@ def create_py_executor(
|
||||
executor_config=executor_config,
|
||||
ctx_chunk_config=ctx_chunk_config,
|
||||
model_engine=model_engine,
|
||||
draft_model_engine=draft_model_engine,
|
||||
start_worker=False,
|
||||
sampler=sampler,
|
||||
drafter=drafter,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user