From b9fe0fa7ec99ef504c42ed4d659eca496cc4a34a Mon Sep 17 00:00:00 2001 From: Leslie Fang Date: Mon, 4 Aug 2025 13:46:07 +0800 Subject: [PATCH] [None][infra] Enable test of chunked prefill with logit post processor (#6483) Signed-off-by: leslie-fang25 --- docs/source/torch/features/feature_combination_matrix.md | 2 +- tests/unittest/llmapi/test_llm.py | 5 +++++ tests/unittest/llmapi/test_llm_pytorch.py | 6 ++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/source/torch/features/feature_combination_matrix.md b/docs/source/torch/features/feature_combination_matrix.md index 214d37b61d..35a10a4959 100644 --- a/docs/source/torch/features/feature_combination_matrix.md +++ b/docs/source/torch/features/feature_combination_matrix.md @@ -14,5 +14,5 @@ | TLLM C++ Sampler | Yes | Yes | Yes | Yes | Yes | No | No | No | No | --- | | | | | | KV Cache Reuse | Yes | Yes | Yes | Untested | Yes | Untested | Yes | No | Yes | Yes | --- | | | | | Slide Window Attention | Yes | Yes | Yes | Untested | No | Untested | Untested | Untested | Yes | Yes | WIP | --- | | | -| Logits Post Processor | No | Yes | Yes | No | Untested | No | No | No | Yes | Yes | Yes | Yes | --- | | +| Logits Post Processor | No | Yes | Yes | No | Yes | No | No | No | Yes | Yes | Yes | Yes | --- | | | Guided Decoding | Yes | Yes | Yes | No | Yes | No | No | No | Yes | Yes | Yes | Yes | Yes | --- | diff --git a/tests/unittest/llmapi/test_llm.py b/tests/unittest/llmapi/test_llm.py index 7f05e6e0e1..a7bc583162 100644 --- a/tests/unittest/llmapi/test_llm.py +++ b/tests/unittest/llmapi/test_llm.py @@ -1149,6 +1149,11 @@ def tinyllama_logits_processor_test_harness(backend=None, **llm_kwargs): sampling_params = SamplingParams( max_tokens=6, logits_processor=MyLogitsProcessor(biased_word_id)) + prompts = ["A B C"] + if llm_kwargs.get('enable_chunked_prefill', None): + prompts[0] = prompts[0] * 256 + llm_kwargs["max_num_tokens"] = 256 + llm_test_harness( llama_model_path, prompts, ["Z Z Z Z Z Z"], diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py index c9e5328690..b1411225f0 100644 --- a/tests/unittest/llmapi/test_llm_pytorch.py +++ b/tests/unittest/llmapi/test_llm_pytorch.py @@ -37,8 +37,10 @@ from transformers import AutoModelForCausalLM @force_ampere -def test_tinyllama_logits_processor(): - tinyllama_logits_processor_test_harness(backend="pytorch") +@pytest.mark.parametrize("enable_chunked_prefill,", [False, True]) +def test_tinyllama_logits_processor(enable_chunked_prefill): + tinyllama_logits_processor_test_harness( + backend="pytorch", enable_chunked_prefill=enable_chunked_prefill) @pytest.mark.parametrize(