diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index 4b9b2a71f9..ba78064b52 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -1037,12 +1037,12 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness): ctx_server_config["kv_cache_config"] = { "max_attention_window": [512, 512, 512, 512, 512, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": block_reuse, } gen_server_config["kv_cache_config"] = { "max_attention_window": [512, 512, 512, 512, 512, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": block_reuse, } disaggregated_server_config = { "hostname": "localhost", @@ -1066,7 +1066,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness): task.evaluate(llm) -@skip_pre_hopper +@skip_pre_blackwell @pytest.mark.skip_less_device_memory(80000) class TestGPTOSS(LlmapiAccuracyTestHarness): extra_evaluator_kwargs = { @@ -1099,13 +1099,13 @@ class TestGPTOSS(LlmapiAccuracyTestHarness): ctx_server_config["kv_cache_config"] = { "max_attention_window": [128, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": block_reuse, "free_gpu_memory_fraction": 0.5, } gen_server_config["kv_cache_config"] = { "max_attention_window": [128, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": block_reuse, "free_gpu_memory_fraction": 0.5, } disaggregated_server_config = { diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 9fcd2991e1..3eadf74533 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -308,8 +308,6 @@ full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_tp_sharding.py::test_sharding[GQA_Block-torch_dist_all_reduce-True-False-2] SKIP (https://nvbugs/5766982) accuracy/test_cli_flow.py::TestVicuna7B::test_eagle_2[cuda_graph=True-chunked_context=True] SKIP (https://nvbugs/5773185) accuracy/test_cli_flow.py::TestVicuna7B::test_eagle_2[cuda_graph=True-chunked_context=False] SKIP (https://nvbugs/5773185) -accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[True] SKIP (https://nvbugs/5596343) -accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[False] SKIP (https://nvbugs/5596343) accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False-torch_compile=True] SKIP (https://nvbugs/5775326) accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4[torch_compile=False] SKIP (https://nvbugs/5794796) accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4[torch_compile=False] SKIP (https://nvbugs/5794796)