[https://nvbugs/5598674][fix] enable partial reuse in gemma and gpt oss test (#10559)

Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com>
This commit is contained in:
Chuang Zhu 2026-01-16 10:26:15 +08:00 committed by GitHub
parent e3f27e06c7
commit 7e2cbc0756
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 7 deletions

View File

@ -1037,12 +1037,12 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
ctx_server_config["kv_cache_config"] = {
"max_attention_window": [512, 512, 512, 512, 512, 32768],
"enable_block_reuse": block_reuse,
"enable_partial_reuse": False,
"enable_partial_reuse": block_reuse,
}
gen_server_config["kv_cache_config"] = {
"max_attention_window": [512, 512, 512, 512, 512, 32768],
"enable_block_reuse": block_reuse,
"enable_partial_reuse": False,
"enable_partial_reuse": block_reuse,
}
disaggregated_server_config = {
"hostname": "localhost",
@ -1066,7 +1066,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
task.evaluate(llm)
@skip_pre_hopper
@skip_pre_blackwell
@pytest.mark.skip_less_device_memory(80000)
class TestGPTOSS(LlmapiAccuracyTestHarness):
extra_evaluator_kwargs = {
@ -1099,13 +1099,13 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
ctx_server_config["kv_cache_config"] = {
"max_attention_window": [128, 32768],
"enable_block_reuse": block_reuse,
"enable_partial_reuse": False,
"enable_partial_reuse": block_reuse,
"free_gpu_memory_fraction": 0.5,
}
gen_server_config["kv_cache_config"] = {
"max_attention_window": [128, 32768],
"enable_block_reuse": block_reuse,
"enable_partial_reuse": False,
"enable_partial_reuse": block_reuse,
"free_gpu_memory_fraction": 0.5,
}
disaggregated_server_config = {

View File

@ -308,8 +308,6 @@ full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_tp_sharding.py::test_sharding[GQA_Block-torch_dist_all_reduce-True-False-2] SKIP (https://nvbugs/5766982)
accuracy/test_cli_flow.py::TestVicuna7B::test_eagle_2[cuda_graph=True-chunked_context=True] SKIP (https://nvbugs/5773185)
accuracy/test_cli_flow.py::TestVicuna7B::test_eagle_2[cuda_graph=True-chunked_context=False] SKIP (https://nvbugs/5773185)
accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[True] SKIP (https://nvbugs/5596343)
accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[False] SKIP (https://nvbugs/5596343)
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False-torch_compile=True] SKIP (https://nvbugs/5775326)
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4[torch_compile=False] SKIP (https://nvbugs/5794796)
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4[torch_compile=False] SKIP (https://nvbugs/5794796)