mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
minor
Signed-off-by: yuhangh <58161490+heyuhhh@users.noreply.github.com>
This commit is contained in:
parent
eea8f0b182
commit
f97a1aaa96
@ -68,6 +68,7 @@ class CUDAGraphTestScenario:
|
||||
return f"batch_size:{self.batch_size}-num_heads:{self.num_heads}-num_kv_heads:{num_kv_heads_str}-head_dim:{self.head_dim}-anchor_size:{self.anchor_size}-block_size:{self.block_size}-dtype:{self.dtype}"
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5781389")
|
||||
class TestStarAttention(unittest.TestCase):
|
||||
|
||||
@parameterized.expand([
|
||||
@ -116,7 +117,6 @@ class TestStarAttention(unittest.TestCase):
|
||||
], lambda testcase_func, param_num, param:
|
||||
f"{testcase_func.__name__}[{param.args[0]}]")
|
||||
def test_flashinfer_star_attention(self, scenario: Scenario):
|
||||
pytest.skip("https://nvbugspro.nvidia.com/bug/5781389")
|
||||
num_layers = scenario.num_layers
|
||||
num_heads = scenario.num_heads
|
||||
num_kv_heads = scenario.num_kv_heads
|
||||
@ -554,7 +554,6 @@ class TestStarAttention(unittest.TestCase):
|
||||
skip_on_empty=True)
|
||||
def test_attention_with_cuda_graphs(
|
||||
self, test_scenario: CUDAGraphTestScenario) -> None:
|
||||
pytest.skip("https://nvbugspro.nvidia.com/bug/5781389")
|
||||
# This test exercises our CUDAGraph metadata class and makes sure
|
||||
# that the flashinfer attention layer is compatible with graph capture/replay.
|
||||
# We compare the CUDA graph results to the results without CUDA graph.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user