[TRTLLM-6222][feat] Extend cute_dsl_nvfp4_gemm to sm103. (#9543)

Signed-off-by: Mindy Li <11663212+limin2021@users.noreply.github.com>
This commit is contained in:
Li Min 2025-12-01 10:19:36 +08:00 committed by GitHub
parent 34e2fa5c96
commit 1797e91dfd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 6 additions and 6 deletions

View File

@ -25250,7 +25250,7 @@ License: `NVIDIA Proprietary Software`
- `Homepage`: https://developer.nvidia.com/cusparselt
## nvidia-cutlass-dsl (4.3.0)
## nvidia-cutlass-dsl (4.3.1)
### Licenses
License: `None`

View File

@ -69,7 +69,7 @@ triton==3.5.0; platform_machine == "x86_64"
tiktoken
blobfile
openai-harmony==0.0.4
nvidia-cutlass-dsl==4.3.0; python_version >= "3.10"
nvidia-cutlass-dsl==4.3.1; python_version >= "3.10"
plotly
numexpr<2.14.0 # WAR for attempted use of nonexistent numpy.typing
partial_json_parser

View File

@ -49,9 +49,9 @@ if IS_CUTLASS_DSL_AVAILABLE:
self.output_dtype = output_dtype
assert output_dtype == torch.bfloat16
if get_sm_version() != 100:
if get_sm_version() not in [100, 103]:
raise ValueError(
f"SM version {get_sm_version()} is not supported for {self.__class__.__name__}, it only supports SM 100"
f"SM version {get_sm_version()} is not supported for {self.__class__.__name__}, it only supports SM 100 and SM 103"
)
# rewrite the hash function because the value of self.alpha doesn't affect the tactic.

View File

@ -87,8 +87,8 @@ def test_fp4_linear(dtype, mnk):
@pytest.mark.skipif(sys.version_info < (3, 12),
reason="cutlass-dsl 4.1.0 requires Python 3.12+")
@pytest.mark.skipif(
get_sm_version() != 100,
reason="This test is only supported in Blackwell architecture",
get_sm_version() not in [100, 103],
reason="This test is only supported in sm100 and sm103 architecture",
)
@pytest.mark.skipif(not IS_CUTLASS_DSL_AVAILABLE,
reason="cutlass-dsl is not available")