mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[bug] AsyncScheduler drops first post-resume token after pause_generation + clear_cache (#42117)
Signed-off-by: hao-aaron <ahao@anyscale.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
@@ -227,9 +227,6 @@ llm_kwargs = dict(
|
||||
attention_backend=ATTN_BACKEND,
|
||||
gpu_memory_utilization=0.75,
|
||||
weight_transfer_config=WeightTransferConfig(backend="nccl"),
|
||||
# TODO(haosdent): re-enable once #42043 is fixed. Both LLM
|
||||
# instances must match.
|
||||
async_scheduling=False,
|
||||
)
|
||||
llm_kwargs.update(rocm_determinism_kwargs)
|
||||
|
||||
@@ -371,9 +368,6 @@ llm_v2_kwargs = dict(
|
||||
gpu_memory_utilization=0.75,
|
||||
distributed_executor_backend="ray",
|
||||
attention_backend=ATTN_BACKEND,
|
||||
# TODO(haosdent): re-enable once #42043 is fixed. Both LLM
|
||||
# instances must match.
|
||||
async_scheduling=False,
|
||||
)
|
||||
llm_v2_kwargs.update(rocm_determinism_kwargs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user