From ecea71ca7acbc4f0036448ef006f30dbde9c6b10 Mon Sep 17 00:00:00 2001 From: Jonas Li <6110159+longlee0622@users.noreply.github.com> Date: Wed, 24 Dec 2025 15:33:25 +0800 Subject: [PATCH] [None][chore] Update tinygemm kernel name (#10248) Signed-off-by: Jonas Li <6110159+longlee0622@users.noreply.github.com> --- cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_cuda.cu | 6 +++--- cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_kernel.cuh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_cuda.cu b/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_cuda.cu index ee815ec8dd..6832e65efc 100644 --- a/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_cuda.cu +++ b/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_cuda.cu @@ -61,7 +61,7 @@ void launch_tinygemm2(__nv_bfloat16* gA, __nv_bfloat16* gB, __nv_bfloat16* gC, _ int smem_size = STAGES * STAGE_UNROLL * (TILE_M * TILE_K * sizeof(__nv_bfloat16) + TILE_N * TILE_K * sizeof(__nv_bfloat16)); - gpuErrChk(cudaFuncSetAttribute(kernel, + gpuErrChk(cudaFuncSetAttribute(tinygemm_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); int tiles_m = (output_features + TILE_M - 1) / TILE_M; @@ -82,8 +82,8 @@ void launch_tinygemm2(__nv_bfloat16* gA, __nv_bfloat16* gB, __nv_bfloat16* gC, _ attrs[0].val.programmaticStreamSerializationAllowed = 1; config.numAttrs = 1; - cudaLaunchKernelEx(&config, &kernel, gC, gA, gB, - bias, output_features, batch_size, input_features, weight_map, activation_map, nullptr); + cudaLaunchKernelEx(&config, &tinygemm_kernel, + gC, gA, gB, bias, output_features, batch_size, input_features, weight_map, activation_map, nullptr); } torch::Tensor tinygemm2_cuda_forward(torch::Tensor input, torch::Tensor weight, torch::Tensor bias) diff --git a/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_kernel.cuh b/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_kernel.cuh index cc76f35cc0..377b63452d 100644 --- a/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_kernel.cuh +++ b/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_kernel.cuh @@ -172,7 +172,7 @@ struct Profile }; template -__global__ __launch_bounds__(384, 1) void kernel(__nv_bfloat16* output, __nv_bfloat16* weights, +__global__ __launch_bounds__(384, 1) void tinygemm_kernel(__nv_bfloat16* output, __nv_bfloat16* weights, __nv_bfloat16* activations, __nv_bfloat16* bias, int M, int N, int K, const __grid_constant__ CUtensorMap weight_map, const __grid_constant__ CUtensorMap activation_map, Profile* profile = nullptr)