mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-23 12:12:39 +08:00
disable 3xfp4
Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
This commit is contained in:
parent
469a38d0d8
commit
52ad4436bc
5
.gitmodules
vendored
5
.gitmodules
vendored
@ -25,7 +25,4 @@
|
||||
url = https://github.com/wjakob/nanobind
|
||||
[submodule "3rdparty/cppzmq"]
|
||||
path = 3rdparty/cppzmq
|
||||
url = https://github.com/zeromq/cppzmq.git
|
||||
[submodule "3rdparty/dynamic-kernel-generator"]
|
||||
path = 3rdparty/dynamic-kernel-generator
|
||||
url = ssh://git@gitlab-master.nvidia.com:12051/dlarch-fastkernels/dynamic-kernel-generator.git
|
||||
url = https://github.com/zeromq/cppzmq.github
|
||||
|
||||
1
3rdparty/dynamic-kernel-generator
vendored
1
3rdparty/dynamic-kernel-generator
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 34bfe3557372d1d2cebe3c90448b03756c6a16eb
|
||||
@ -215,8 +215,8 @@ include_directories(
|
||||
${CUDAToolkit_INCLUDE_DIRS}/cccl
|
||||
${CUDNN_ROOT_DIR}/include
|
||||
$<TARGET_PROPERTY:TensorRT::NvInfer,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
${3RDPARTY_DIR}/dynamic-kernel-generator/cutlass/include
|
||||
${3RDPARTY_DIR}/dynamic-kernel-generator/cutlass/tools/util/include
|
||||
${3RDPARTY_DIR}/cutlass/include
|
||||
${3RDPARTY_DIR}/cutlass/tools/util/include
|
||||
${3RDPARTY_DIR}/NVTX/include
|
||||
${3RDPARTY_DIR}/json/include)
|
||||
if(BINDING_TYPE STREQUAL "pybind" OR BUILD_DEEP_EP)
|
||||
|
||||
@ -28,7 +28,7 @@ if(NOT Python3_EXECUTABLE)
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
WORKING_DIRECTORY ${3RDPARTY_DIR}/dynamic-kernel-generator/cutlass/python/
|
||||
WORKING_DIRECTORY ${3RDPARTY_DIR}/cutlass/python/
|
||||
COMMAND ${Python3_EXECUTABLE} setup_library.py develop --user
|
||||
RESULT_VARIABLE _CUTLASS_LIBRARY_SUCCESS)
|
||||
|
||||
|
||||
@ -135,7 +135,14 @@ void tma_warp_specialized_generic_moe_gemm_kernelLauncher(TmaWarpSpecializedGrou
|
||||
#ifndef COMPILE_BLACKWELL_SM103_TMA_GROUPED_GEMMS
|
||||
else if constexpr (ArchTag::kMinComputeCapability == 103)
|
||||
{
|
||||
TLLM_THROW("Please recompile with support for blackwell by passing 103-real as an arch to build_wheel.py.");
|
||||
static bool first_time = true;
|
||||
if (first_time)
|
||||
{
|
||||
TLLM_LOG_WARNING(
|
||||
"Falling back to sm100f version. For best performance please recompile with support for blackwell by "
|
||||
"passing 103-real as an arch to build_wheel.py.");
|
||||
first_time = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifndef COMPILE_BLACKWELL_SM120_TMA_GROUPED_GEMMS
|
||||
@ -344,13 +351,8 @@ using SafeBF16 = void;
|
||||
using EpilogueScheduleSM90 = cutlass::epilogue::PtrArrayNoSmemWarpSpecialized; \
|
||||
\
|
||||
constexpr static bool Is2SM = IsBlackwell && (cute::size<0>(ClusterShape{}) % 2) == 0; \
|
||||
using EpilogueScheduleSM100 = std::conditional_t<Is2SM, cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm, \
|
||||
using EpilogueScheduleSM10x = std::conditional_t<Is2SM, cutlass::epilogue::PtrArrayTmaWarpSpecialized2Sm, \
|
||||
cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm>; \
|
||||
using EpilogueScheduleSM103 \
|
||||
= std::conditional_t<Is2SM, cutlass::epilogue::PtrArrayNoSmemWarpSpecialized2Sm, \
|
||||
cutlass::epilogue::PtrArrayNoSmemWarpSpecialized1Sm>; \
|
||||
using EpilogueScheduleSM10x \
|
||||
= std::conditional_t<IsSM103FP4, EpilogueScheduleSM103, EpilogueScheduleSM100>; \
|
||||
\
|
||||
using EpilogueScheduleSM120 = cutlass::epilogue::TmaWarpSpecialized; \
|
||||
using EpilogueScheduleBW = std ::conditional_t<IsSM120, EpilogueScheduleSM120, EpilogueScheduleSM10x>; \
|
||||
|
||||
@ -67,6 +67,7 @@ namespace kernels
|
||||
{
|
||||
namespace cutlass_kernels_oss
|
||||
{
|
||||
using namespace tensorrt_llm::kernels::cutlass_kernels;
|
||||
namespace tk = tensorrt_llm::common;
|
||||
namespace tkc = tensorrt_llm::cutlass_extensions;
|
||||
|
||||
|
||||
@ -105,7 +105,14 @@ void dispatchMoeGemmSelectBiasTmaWarpSpecialized(TmaWarpSpecializedGroupedGemmIn
|
||||
#ifndef COMPILE_BLACKWELL_SM103_TMA_GROUPED_GEMMS
|
||||
else if constexpr (Arch::kMinComputeCapability == 103)
|
||||
{
|
||||
TLLM_THROW("Please recompile with support for blackwell by passing 103-real as an arch to build_wheel.py.");
|
||||
static bool first_time = true;
|
||||
if (first_time)
|
||||
{
|
||||
TLLM_LOG_WARNING(
|
||||
"Falling back to sm100f version. For best performance please recompile with support for blackwell by "
|
||||
"passing 103-real as an arch to build_wheel.py.");
|
||||
first_time = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifndef COMPILE_BLACKWELL_TMA_GROUPED_GEMMS
|
||||
|
||||
@ -24,6 +24,18 @@
|
||||
#include <cuda_fp4.h>
|
||||
#endif
|
||||
|
||||
#if !defined(CUTLASS_ARCH_MMA_SM103_SUPPORTED)
|
||||
namespace cutlass::arch
|
||||
{
|
||||
using Sm103 = Sm100;
|
||||
}
|
||||
|
||||
namespace cutlass::gemm
|
||||
{
|
||||
using KernelPtrArrayTmaWarpSpecialized1SmBlockScaled3xOmmaVs16Sm103 = void;
|
||||
using KernelPtrArrayTmaWarpSpecialized2SmBlockScaled3xOmmaVs16Sm103 = void;
|
||||
} // namespace cutlass::gemm
|
||||
#endif
|
||||
namespace tensorrt_llm::kernels::cutlass_kernels
|
||||
{
|
||||
|
||||
|
||||
@ -26,8 +26,8 @@ include(GoogleTest)
|
||||
include_directories(
|
||||
${PROJECT_SOURCE_DIR}/tensorrt_llm/cutlass_extensions/include
|
||||
${PROJECT_SOURCE_DIR}/include
|
||||
${3RDPARTY_DIR}/dynamic-kernel-generator/cutlass/include
|
||||
${3RDPARTY_DIR}/dynamic-kernel-generator/cutlass/tools/util/include
|
||||
${3RDPARTY_DIR}/cutlass/include
|
||||
${3RDPARTY_DIR}/cutlass/tools/util/include
|
||||
${PROJECT_SOURCE_DIR}/tests/batch_manager
|
||||
${PROJECT_SOURCE_DIR}/tests/utils)
|
||||
|
||||
|
||||
@ -217,7 +217,7 @@ target_include_directories(
|
||||
${CUDA_INCLUDE_DIRS}
|
||||
${CUDNN_ROOT_DIR}/include
|
||||
${NCCL_INCLUDE_DIR}
|
||||
${3RDPARTY_DIR}/dynamic-kernel-generator/cutlass/include
|
||||
${3RDPARTY_DIR}/cutlass/include
|
||||
${MPI_INCLUDE_PATH}
|
||||
${COMMON_HEADER_DIR})
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user