From b9fe0fa7ec99ef504c42ed4d659eca496cc4a34a Mon Sep 17 00:00:00 2001
From: Leslie Fang <leslief@nvidia.com>
Date: Mon, 4 Aug 2025 13:46:07 +0800
Subject: [PATCH] [None][infra] Enable test of chunked prefill with logit post
 processor (#6483)

Signed-off-by: leslie-fang25 <leslief@nvidia.com>
---
 docs/source/torch/features/feature_combination_matrix.md | 2 +-
 tests/unittest/llmapi/test_llm.py                        | 5 +++++
 tests/unittest/llmapi/test_llm_pytorch.py                | 6 ++++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/source/torch/features/feature_combination_matrix.md b/docs/source/torch/features/feature_combination_matrix.md
index 214d37b61d..35a10a4959 100644
--- a/docs/source/torch/features/feature_combination_matrix.md
+++ b/docs/source/torch/features/feature_combination_matrix.md
@@ -14,5 +14,5 @@
 | TLLM C++ Sampler           | Yes               | Yes        | Yes                        | Yes                   | Yes             | No       | No                        | No                        | No            | ---              |                |                        |                       |                 |
 | KV Cache Reuse             | Yes               | Yes        | Yes                        | Untested              | Yes             | Untested | Yes                       | No                        | Yes           | Yes              | ---            |                        |                       |                 |
 | Slide Window Attention     | Yes               | Yes        | Yes                        | Untested              | No                    | Untested | Untested                  | Untested                  | Yes           | Yes              | WIP            | ---                    |                       |                 |
-| Logits Post Processor      | No                | Yes        | Yes                        | No                    | Untested        | No       | No                        | No                        | Yes           | Yes              | Yes            | Yes                    | ---                   |                 |
+| Logits Post Processor      | No                | Yes        | Yes                        | No                    | Yes            | No       | No                        | No                        | Yes           | Yes              | Yes            | Yes                    | ---                   |                 |
 | Guided Decoding            | Yes               | Yes        | Yes                        | No                    | Yes             | No       | No                        | No                        | Yes           | Yes              | Yes            | Yes                    | Yes                   | ---             |
diff --git a/tests/unittest/llmapi/test_llm.py b/tests/unittest/llmapi/test_llm.py
index 7f05e6e0e1..a7bc583162 100644
--- a/tests/unittest/llmapi/test_llm.py
+++ b/tests/unittest/llmapi/test_llm.py
@@ -1149,6 +1149,11 @@ def tinyllama_logits_processor_test_harness(backend=None, **llm_kwargs):
     sampling_params = SamplingParams(
         max_tokens=6, logits_processor=MyLogitsProcessor(biased_word_id))
 
+    prompts = ["A B C"]
+    if llm_kwargs.get('enable_chunked_prefill', None):
+        prompts[0] = prompts[0] * 256
+        llm_kwargs["max_num_tokens"] = 256
+
     llm_test_harness(
         llama_model_path,
         prompts, ["Z Z Z Z Z Z"],
diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py
index c9e5328690..b1411225f0 100644
--- a/tests/unittest/llmapi/test_llm_pytorch.py
+++ b/tests/unittest/llmapi/test_llm_pytorch.py
@@ -37,8 +37,10 @@ from transformers import AutoModelForCausalLM
 
 
 @force_ampere
-def test_tinyllama_logits_processor():
-    tinyllama_logits_processor_test_harness(backend="pytorch")
+@pytest.mark.parametrize("enable_chunked_prefill,", [False, True])
+def test_tinyllama_logits_processor(enable_chunked_prefill):
+    tinyllama_logits_processor_test_harness(
+        backend="pytorch", enable_chunked_prefill=enable_chunked_prefill)
 
 
 @pytest.mark.parametrize(