chore: cutlass cleanup (#3165)

Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-04-01 13:57:38 +08:00 · 2025-04-01 13:57:38 +08:00 · 2994527110
commit 2994527110
parent 22ff81b047
5 changed files with 32 additions and 78 deletions
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h
@ -17,14 +17,18 @@
 #pragma once
 #include <NvInferRuntime.h>
-#include <cuda_bf16.h>
+
 #include "cutlass/half.h"
 #include <cuda_fp16.h>
 #include "cutlass/bfloat16.h"
 #include <cuda_bf16.h>
 #include "cutlass/float8.h"
 #include <cuda_fp8.h>
-#include "cutlass/bfloat16.h"
+#include "cutlass/float_subbyte.h"
-#include "cutlass/float8.h"
+#include <cuda_fp4.h>
 #include "cutlass/half.h"
 namespace tensorrt_llm
 {
@ -59,6 +63,12 @@ struct CutlassType<nvinfer1::DataType::kFP8>
    using type = cutlass::float_e4m3_t;
 };
 template <>
 struct CutlassType<nvinfer1::DataType::kFP4>
 {
    using type = cutlass::float_e2m1_t;
 };
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Tllm to Cutlass
@ -96,6 +106,14 @@ struct TllmToCutlassTypeAdapter<__nv_fp8_e5m2>
 };
 #endif
 #if defined(ENABLE_FP4)
 template <>
 struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
 {
    using type = cutlass::float_e2m1_t;
 };
 #endif
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Cutlass to Tllm
@ -133,6 +151,14 @@ struct CutlassToTllmTypeAdapter<cutlass::float_e5m2_t>
 };
 #endif
 #if defined(ENABLE_FP4)
 template <>
 struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
 {
    using type = __nv_fp4_e2m1;
 };
 #endif
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 } // namespace cutlass_kernels
--- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h
+++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h
@ -1,65 +0,0 @@
 /*
 * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #pragma once
 #include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
 #ifdef ENABLE_FP4
 #include <cuda_fp4.h>
 #endif
 // We forward declare so we don't have to pull in a million cutlass includes
 namespace cutlass
 {
 // FP4 and FP6 types
 struct float_e2m1_t;
 struct float_e3m2_t;
 struct float_ue4m3_t;
 } // namespace cutlass
 namespace tensorrt_llm
 {
 namespace kernels
 {
 namespace cutlass_kernels
 {
 #if defined(ENABLE_FP4)
 template <>
 struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
 {
    using type = cutlass::float_e2m1_t;
 };
 #endif
 #if defined(ENABLE_FP4)
 template <>
 struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
 {
    using type = __nv_fp4_e2m1;
 };
 #endif
 #if defined(ENABLE_FP4)
 template <>
 struct CutlassType<nvinfer1::DataType::kFP4>
 {
    using type = cutlass::float_e2m1_t;
 };
 #endif
 } // namespace cutlass_kernels
 } // namespace kernels
 } // namespace tensorrt_llm
--- a/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
@ -16,7 +16,7 @@
 */
 #include "gemmAllReducePlugin.h"
 #include "tensorrt_llm/common/assert.h"
-#include "tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h"
+#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
 #include "tensorrt_llm/plugins/common/pluginUtils.h"
 #include <unistd.h>
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@ -846,7 +846,7 @@ def runLLMBuildFromPackage(pipeline, cpu_arch, reinstall_dependencies=false, whe
        # Folders and their allowed files
        declare -A ALLOWED=(
-            ["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]="internal_cutlass_type_conversion.h"
+            ["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]=""
            ["./tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src"]=""
        )
--- a/jenkins/license_cpp.json
+++ b/jenkins/license_cpp.json
@ -20,19 +20,12 @@
            ],
            "license_name": "apache"
        },
        {
            "include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h",
            "license_name": "apache"
        },
        {
            "include_re": "^tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/",
            "license_name": "nvidia"
        },
        {
            "include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/",
            "exclude_re": [
                "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h"
            ],
            "license_name": "nvidia"
        }
    ],