chore: cutlass cleanup (#3165)

Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
This commit is contained in:
Yuan Tong 2025-04-01 13:57:38 +08:00 committed by GitHub
parent 22ff81b047
commit 2994527110
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 32 additions and 78 deletions

View File

@ -17,14 +17,18 @@
#pragma once #pragma once
#include <NvInferRuntime.h> #include <NvInferRuntime.h>
#include <cuda_bf16.h>
#include "cutlass/half.h"
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include "cutlass/bfloat16.h"
#include <cuda_bf16.h>
#include "cutlass/float8.h"
#include <cuda_fp8.h> #include <cuda_fp8.h>
#include "cutlass/bfloat16.h" #include "cutlass/float_subbyte.h"
#include "cutlass/float8.h" #include <cuda_fp4.h>
#include "cutlass/half.h"
namespace tensorrt_llm namespace tensorrt_llm
{ {
@ -59,6 +63,12 @@ struct CutlassType<nvinfer1::DataType::kFP8>
using type = cutlass::float_e4m3_t; using type = cutlass::float_e4m3_t;
}; };
template <>
struct CutlassType<nvinfer1::DataType::kFP4>
{
using type = cutlass::float_e2m1_t;
};
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
// Tllm to Cutlass // Tllm to Cutlass
@ -96,6 +106,14 @@ struct TllmToCutlassTypeAdapter<__nv_fp8_e5m2>
}; };
#endif #endif
#if defined(ENABLE_FP4)
template <>
struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
{
using type = cutlass::float_e2m1_t;
};
#endif
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
// Cutlass to Tllm // Cutlass to Tllm
@ -133,6 +151,14 @@ struct CutlassToTllmTypeAdapter<cutlass::float_e5m2_t>
}; };
#endif #endif
#if defined(ENABLE_FP4)
template <>
struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
{
using type = __nv_fp4_e2m1;
};
#endif
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
} // namespace cutlass_kernels } // namespace cutlass_kernels

View File

@ -1,65 +0,0 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
#ifdef ENABLE_FP4
#include <cuda_fp4.h>
#endif
// We forward declare so we don't have to pull in a million cutlass includes
namespace cutlass
{
// FP4 and FP6 types
struct float_e2m1_t;
struct float_e3m2_t;
struct float_ue4m3_t;
} // namespace cutlass
namespace tensorrt_llm
{
namespace kernels
{
namespace cutlass_kernels
{
#if defined(ENABLE_FP4)
template <>
struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
{
using type = cutlass::float_e2m1_t;
};
#endif
#if defined(ENABLE_FP4)
template <>
struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
{
using type = __nv_fp4_e2m1;
};
#endif
#if defined(ENABLE_FP4)
template <>
struct CutlassType<nvinfer1::DataType::kFP4>
{
using type = cutlass::float_e2m1_t;
};
#endif
} // namespace cutlass_kernels
} // namespace kernels
} // namespace tensorrt_llm

View File

@ -16,7 +16,7 @@
*/ */
#include "gemmAllReducePlugin.h" #include "gemmAllReducePlugin.h"
#include "tensorrt_llm/common/assert.h" #include "tensorrt_llm/common/assert.h"
#include "tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h" #include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
#include "tensorrt_llm/plugins/common/pluginUtils.h" #include "tensorrt_llm/plugins/common/pluginUtils.h"
#include <unistd.h> #include <unistd.h>

View File

@ -846,7 +846,7 @@ def runLLMBuildFromPackage(pipeline, cpu_arch, reinstall_dependencies=false, whe
# Folders and their allowed files # Folders and their allowed files
declare -A ALLOWED=( declare -A ALLOWED=(
["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]="internal_cutlass_type_conversion.h" ["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]=""
["./tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src"]="" ["./tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src"]=""
) )

View File

@ -20,19 +20,12 @@
], ],
"license_name": "apache" "license_name": "apache"
}, },
{
"include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h",
"license_name": "apache"
},
{ {
"include_re": "^tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/", "include_re": "^tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/",
"license_name": "nvidia" "license_name": "nvidia"
}, },
{ {
"include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/", "include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/",
"exclude_re": [
"^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h"
],
"license_name": "nvidia" "license_name": "nvidia"
} }
], ],