TensorRT-LLMs/cpp/kernels/fmha_v2/test/fmha/test_meta.py
Kanghwan 41e5870a70
[#8476][chore] Update license (#8807)
Signed-off-by: Kanghwan Jang <861393+karljang@users.noreply.github.com>
2025-11-19 15:05:25 -08:00

81 lines
2.6 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# test of tests
import pytest
from filter_rules import *
from utils import *
fmha_exe_path = 'bin/fmha.exe'
# debug
@pytest.mark.debug
@pytest.mark.parametrize('rules', [[]])
@pytest.mark.parametrize('fmha_arg', [FmhaArgs()])
def test_fmha_single_case(fmha_arg, rules, dryrun, disable_rules,
gpu_compute_cap):
fmha_harness(fmha_exe_path,
fmha_arg,
rules,
dryrun,
gpu_compute_cap=gpu_compute_cap)
# debug
@pytest.mark.debug
@apply_rule(corner_case_rule)
@pytest.mark.parametrize('rules', [[]])
@pytest.mark.parametrize('fmha_arg',
[FmhaArgs()._replace(seq_len=16, head_dim=64)])
def test_corner_case_rule(fmha_arg, rules, dryrun, disable_rules,
gpu_compute_cap):
fmha_harness(fmha_exe_path,
fmha_arg,
rules,
dryrun,
gpu_compute_cap=gpu_compute_cap)
# debug
@pytest.mark.debug
@apply_rule(compute_cap_specific_rule)
@pytest.mark.parametrize('rules', [[]])
@pytest.mark.parametrize('fmha_arg', [FmhaArgs()])
def test_compute_cap_specific_rule(fmha_arg, rules, dryrun, disable_rules,
mock_gpu_compute_cap):
fmha_harness(fmha_exe_path,
fmha_arg,
rules,
dryrun,
gpu_compute_cap=mock_gpu_compute_cap)
# debug; try running large seq and large head size without error tolerance
@pytest.mark.debug
@apply_rule(sm80_only)
@pytest.mark.parametrize('rules', [[]])
@pytest.mark.parametrize('fmha_arg',
[FmhaArgs()._replace(seq_len=4096, head_dim=256)])
@pytest.mark.xfail
def test_controlled_validation_fail(fmha_arg, rules, dryrun, disable_rules,
gpu_compute_cap):
fmha_harness(fmha_exe_path,
fmha_arg,
rules,
dryrun,
gpu_compute_cap=gpu_compute_cap)