mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
chore: cutlass cleanup (#3165)
Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
This commit is contained in:
parent
22ff81b047
commit
2994527110
@ -17,14 +17,18 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <NvInferRuntime.h>
|
#include <NvInferRuntime.h>
|
||||||
#include <cuda_bf16.h>
|
|
||||||
|
#include "cutlass/half.h"
|
||||||
#include <cuda_fp16.h>
|
#include <cuda_fp16.h>
|
||||||
|
|
||||||
|
#include "cutlass/bfloat16.h"
|
||||||
|
#include <cuda_bf16.h>
|
||||||
|
|
||||||
|
#include "cutlass/float8.h"
|
||||||
#include <cuda_fp8.h>
|
#include <cuda_fp8.h>
|
||||||
|
|
||||||
#include "cutlass/bfloat16.h"
|
#include "cutlass/float_subbyte.h"
|
||||||
#include "cutlass/float8.h"
|
#include <cuda_fp4.h>
|
||||||
#include "cutlass/half.h"
|
|
||||||
|
|
||||||
namespace tensorrt_llm
|
namespace tensorrt_llm
|
||||||
{
|
{
|
||||||
@ -59,6 +63,12 @@ struct CutlassType<nvinfer1::DataType::kFP8>
|
|||||||
using type = cutlass::float_e4m3_t;
|
using type = cutlass::float_e4m3_t;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct CutlassType<nvinfer1::DataType::kFP4>
|
||||||
|
{
|
||||||
|
using type = cutlass::float_e2m1_t;
|
||||||
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Tllm to Cutlass
|
// Tllm to Cutlass
|
||||||
|
|
||||||
@ -96,6 +106,14 @@ struct TllmToCutlassTypeAdapter<__nv_fp8_e5m2>
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(ENABLE_FP4)
|
||||||
|
template <>
|
||||||
|
struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
|
||||||
|
{
|
||||||
|
using type = cutlass::float_e2m1_t;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Cutlass to Tllm
|
// Cutlass to Tllm
|
||||||
|
|
||||||
@ -133,6 +151,14 @@ struct CutlassToTllmTypeAdapter<cutlass::float_e5m2_t>
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(ENABLE_FP4)
|
||||||
|
template <>
|
||||||
|
struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
|
||||||
|
{
|
||||||
|
using type = __nv_fp4_e2m1;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
} // namespace cutlass_kernels
|
} // namespace cutlass_kernels
|
||||||
|
|||||||
@ -1,65 +0,0 @@
|
|||||||
/*
|
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
|
|
||||||
|
|
||||||
#ifdef ENABLE_FP4
|
|
||||||
#include <cuda_fp4.h>
|
|
||||||
#endif
|
|
||||||
// We forward declare so we don't have to pull in a million cutlass includes
|
|
||||||
namespace cutlass
|
|
||||||
{
|
|
||||||
// FP4 and FP6 types
|
|
||||||
struct float_e2m1_t;
|
|
||||||
struct float_e3m2_t;
|
|
||||||
struct float_ue4m3_t;
|
|
||||||
} // namespace cutlass
|
|
||||||
|
|
||||||
namespace tensorrt_llm
|
|
||||||
{
|
|
||||||
namespace kernels
|
|
||||||
{
|
|
||||||
namespace cutlass_kernels
|
|
||||||
{
|
|
||||||
#if defined(ENABLE_FP4)
|
|
||||||
template <>
|
|
||||||
struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
|
|
||||||
{
|
|
||||||
using type = cutlass::float_e2m1_t;
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(ENABLE_FP4)
|
|
||||||
template <>
|
|
||||||
struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
|
|
||||||
{
|
|
||||||
using type = __nv_fp4_e2m1;
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(ENABLE_FP4)
|
|
||||||
template <>
|
|
||||||
struct CutlassType<nvinfer1::DataType::kFP4>
|
|
||||||
{
|
|
||||||
using type = cutlass::float_e2m1_t;
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace cutlass_kernels
|
|
||||||
} // namespace kernels
|
|
||||||
} // namespace tensorrt_llm
|
|
||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#include "gemmAllReducePlugin.h"
|
#include "gemmAllReducePlugin.h"
|
||||||
#include "tensorrt_llm/common/assert.h"
|
#include "tensorrt_llm/common/assert.h"
|
||||||
#include "tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h"
|
#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
|
||||||
#include "tensorrt_llm/plugins/common/pluginUtils.h"
|
#include "tensorrt_llm/plugins/common/pluginUtils.h"
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|||||||
@ -846,7 +846,7 @@ def runLLMBuildFromPackage(pipeline, cpu_arch, reinstall_dependencies=false, whe
|
|||||||
|
|
||||||
# Folders and their allowed files
|
# Folders and their allowed files
|
||||||
declare -A ALLOWED=(
|
declare -A ALLOWED=(
|
||||||
["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]="internal_cutlass_type_conversion.h"
|
["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]=""
|
||||||
["./tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src"]=""
|
["./tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src"]=""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -20,19 +20,12 @@
|
|||||||
],
|
],
|
||||||
"license_name": "apache"
|
"license_name": "apache"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h",
|
|
||||||
"license_name": "apache"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"include_re": "^tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/",
|
"include_re": "^tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/",
|
||||||
"license_name": "nvidia"
|
"license_name": "nvidia"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/",
|
"include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/",
|
||||||
"exclude_re": [
|
|
||||||
"^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h"
|
|
||||||
],
|
|
||||||
"license_name": "nvidia"
|
"license_name": "nvidia"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user