Signed-off-by: yuhangh <58161490+heyuhhh@users.noreply.github.com>
This commit is contained in:
yuhangh 2026-01-06 07:12:13 +00:00
parent eea8f0b182
commit f97a1aaa96

View File

@ -68,6 +68,7 @@ class CUDAGraphTestScenario:
return f"batch_size:{self.batch_size}-num_heads:{self.num_heads}-num_kv_heads:{num_kv_heads_str}-head_dim:{self.head_dim}-anchor_size:{self.anchor_size}-block_size:{self.block_size}-dtype:{self.dtype}"
@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5781389")
class TestStarAttention(unittest.TestCase):
@parameterized.expand([
@ -116,7 +117,6 @@ class TestStarAttention(unittest.TestCase):
], lambda testcase_func, param_num, param:
f"{testcase_func.__name__}[{param.args[0]}]")
def test_flashinfer_star_attention(self, scenario: Scenario):
pytest.skip("https://nvbugspro.nvidia.com/bug/5781389")
num_layers = scenario.num_layers
num_heads = scenario.num_heads
num_kv_heads = scenario.num_kv_heads
@ -554,7 +554,6 @@ class TestStarAttention(unittest.TestCase):
skip_on_empty=True)
def test_attention_with_cuda_graphs(
self, test_scenario: CUDAGraphTestScenario) -> None:
pytest.skip("https://nvbugspro.nvidia.com/bug/5781389")
# This test exercises our CUDAGraph metadata class and makes sure
# that the flashinfer attention layer is compatible with graph capture/replay.
# We compare the CUDA graph results to the results without CUDA graph.