[bug] AsyncScheduler drops first post-resume token after pause_generation + clear_cache (#42117)

Signed-off-by: hao-aaron <ahao@anyscale.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
2026-06-06 00:16:14 +00:00 · 2026-05-19 01:06:21 -07:00
parent b14be81c1f
commit f34623bf3c
4 changed files with 13 additions and 15 deletions
@@ -227,9 +227,6 @@ llm_kwargs = dict(
    attention_backend=ATTN_BACKEND,
    gpu_memory_utilization=0.75,
    weight_transfer_config=WeightTransferConfig(backend="nccl"),
-    # TODO(haosdent): re-enable once #42043 is fixed. Both LLM
-    # instances must match.
-    async_scheduling=False,
 )
 llm_kwargs.update(rocm_determinism_kwargs)

@@ -371,9 +368,6 @@ llm_v2_kwargs = dict(
    gpu_memory_utilization=0.75,
    distributed_executor_backend="ray",
    attention_backend=ATTN_BACKEND,
-    # TODO(haosdent): re-enable once #42043 is fixed. Both LLM
-    # instances must match.
-    async_scheduling=False,
 )
 llm_v2_kwargs.update(rocm_determinism_kwargs)