diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h index 5403e658e0..411013aa26 100644 --- a/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h +++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h @@ -17,14 +17,18 @@ #pragma once #include -#include + +#include "cutlass/half.h" #include +#include "cutlass/bfloat16.h" +#include + +#include "cutlass/float8.h" #include -#include "cutlass/bfloat16.h" -#include "cutlass/float8.h" -#include "cutlass/half.h" +#include "cutlass/float_subbyte.h" +#include namespace tensorrt_llm { @@ -59,6 +63,12 @@ struct CutlassType using type = cutlass::float_e4m3_t; }; +template <> +struct CutlassType +{ + using type = cutlass::float_e2m1_t; +}; + /////////////////////////////////////////////////////////////////////////////////////////////////// // Tllm to Cutlass @@ -96,6 +106,14 @@ struct TllmToCutlassTypeAdapter<__nv_fp8_e5m2> }; #endif +#if defined(ENABLE_FP4) +template <> +struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1> +{ + using type = cutlass::float_e2m1_t; +}; +#endif + /////////////////////////////////////////////////////////////////////////////////////////////////// // Cutlass to Tllm @@ -133,6 +151,14 @@ struct CutlassToTllmTypeAdapter }; #endif +#if defined(ENABLE_FP4) +template <> +struct CutlassToTllmTypeAdapter +{ + using type = __nv_fp4_e2m1; +}; +#endif + /////////////////////////////////////////////////////////////////////////////////////////////////// } // namespace cutlass_kernels diff --git a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h deleted file mode 100644 index f8f0b7f07c..0000000000 --- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h" - -#ifdef ENABLE_FP4 -#include -#endif -// We forward declare so we don't have to pull in a million cutlass includes -namespace cutlass -{ -// FP4 and FP6 types -struct float_e2m1_t; -struct float_e3m2_t; -struct float_ue4m3_t; -} // namespace cutlass - -namespace tensorrt_llm -{ -namespace kernels -{ -namespace cutlass_kernels -{ -#if defined(ENABLE_FP4) -template <> -struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1> -{ - using type = cutlass::float_e2m1_t; -}; -#endif - -#if defined(ENABLE_FP4) -template <> -struct CutlassToTllmTypeAdapter -{ - using type = __nv_fp4_e2m1; -}; -#endif - -#if defined(ENABLE_FP4) -template <> -struct CutlassType -{ - using type = cutlass::float_e2m1_t; -}; -#endif - -} // namespace cutlass_kernels -} // namespace kernels -} // namespace tensorrt_llm diff --git a/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp b/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp index 5274cd310b..d4a851f12f 100644 --- a/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp +++ b/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp @@ -16,7 +16,7 @@ */ #include "gemmAllReducePlugin.h" #include "tensorrt_llm/common/assert.h" -#include "tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h" +#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h" #include "tensorrt_llm/plugins/common/pluginUtils.h" #include diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index c29bbb29ea..324715b5a2 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -846,7 +846,7 @@ def runLLMBuildFromPackage(pipeline, cpu_arch, reinstall_dependencies=false, whe # Folders and their allowed files declare -A ALLOWED=( - ["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]="internal_cutlass_type_conversion.h" + ["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]="" ["./tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src"]="" ) diff --git a/jenkins/license_cpp.json b/jenkins/license_cpp.json index a9435eb22c..fe0ad02d6b 100644 --- a/jenkins/license_cpp.json +++ b/jenkins/license_cpp.json @@ -20,19 +20,12 @@ ], "license_name": "apache" }, - { - "include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h", - "license_name": "apache" - }, { "include_re": "^tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/", "license_name": "nvidia" }, { "include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/", - "exclude_re": [ - "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h" - ], "license_name": "nvidia" } ],