[bug] AsyncScheduler drops first post-resume token after pause_generation + clear_cache (#42117)

Signed-off-by: hao-aaron <ahao@anyscale.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
Aaron Hao
2026-05-19 01:06:21 -07:00
committed by GitHub
parent b14be81c1f
commit f34623bf3c
4 changed files with 13 additions and 15 deletions
-6
View File
@@ -227,9 +227,6 @@ llm_kwargs = dict(
attention_backend=ATTN_BACKEND,
gpu_memory_utilization=0.75,
weight_transfer_config=WeightTransferConfig(backend="nccl"),
# TODO(haosdent): re-enable once #42043 is fixed. Both LLM
# instances must match.
async_scheduling=False,
)
llm_kwargs.update(rocm_determinism_kwargs)
@@ -371,9 +368,6 @@ llm_v2_kwargs = dict(
gpu_memory_utilization=0.75,
distributed_executor_backend="ray",
attention_backend=ATTN_BACKEND,
# TODO(haosdent): re-enable once #42043 is fixed. Both LLM
# instances must match.
async_scheduling=False,
)
llm_v2_kwargs.update(rocm_determinism_kwargs)