Turn off KV block reuse.

Remove target_sparsity_0 from CI. Signed-off-by: Bo Li <22713281+bobboli@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2026-01-10 22:00:09 +08:00 · 2026-01-10 22:00:09 +08:00 · b97044612f
commit b97044612f
parent b07c5668f5
3 changed files with 4 additions and 4 deletions
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@ -3902,7 +3902,8 @@ class TestQwen3_30B_A3B_Instruct_2507(LlmapiAccuracyTestHarness):
                "prefill": thr_prefill,
                "decode": thr_decode,
            })
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75,
+                                        enable_block_reuse=False)

        if get_sm_version() >= 100:
            pytest.skip("https://nvbugs/5783509: Bug to be fixed on Blackwell")
@ -3936,7 +3937,8 @@ class TestQwen3_30B_A3B_Instruct_2507(LlmapiAccuracyTestHarness):
                "prefill": thr_prefill,
                "decode": thr_decode,
            })
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75,
+                                        enable_block_reuse=False)

        with LLM(self.MODEL_PATH,
                 attn_backend="TRTLLM",
--- a/tests/integration/test_lists/test-db/l0_b200.yml
+++ b/tests/integration/test_lists/test-db/l0_b200.yml
@ -55,7 +55,6 @@ l0_b200:
  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-TRTLLM]
  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-CUTLASS]
  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRTLLM]
-  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.0]
  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.5]
  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.9]
  - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp1-cutlass]
--- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
@ -48,7 +48,6 @@ l0_dgx_h100:
  # llmapi
  - unittest/llmapi/test_mpi_session.py::test_llmapi_launch_multiple_tasks
  # ------------- Skip softmax attention tests ---------------
-  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention_2gpus[target_sparsity_0.0]
  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention_2gpus[target_sparsity_0.5]
  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention_2gpus[target_sparsity_0.9]
 - condition: