diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h
index 5403e658e0..411013aa26 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h
@@ -17,14 +17,18 @@
 #pragma once
 
 #include <NvInferRuntime.h>
-#include <cuda_bf16.h>
+
+#include "cutlass/half.h"
 #include <cuda_fp16.h>
 
+#include "cutlass/bfloat16.h"
+#include <cuda_bf16.h>
+
+#include "cutlass/float8.h"
 #include <cuda_fp8.h>
 
-#include "cutlass/bfloat16.h"
-#include "cutlass/float8.h"
-#include "cutlass/half.h"
+#include "cutlass/float_subbyte.h"
+#include <cuda_fp4.h>
 
 namespace tensorrt_llm
 {
@@ -59,6 +63,12 @@ struct CutlassType<nvinfer1::DataType::kFP8>
     using type = cutlass::float_e4m3_t;
 };
 
+template <>
+struct CutlassType<nvinfer1::DataType::kFP4>
+{
+    using type = cutlass::float_e2m1_t;
+};
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Tllm to Cutlass
 
@@ -96,6 +106,14 @@ struct TllmToCutlassTypeAdapter<__nv_fp8_e5m2>
 };
 #endif
 
+#if defined(ENABLE_FP4)
+template <>
+struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
+{
+    using type = cutlass::float_e2m1_t;
+};
+#endif
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Cutlass to Tllm
 
@@ -133,6 +151,14 @@ struct CutlassToTllmTypeAdapter<cutlass::float_e5m2_t>
 };
 #endif
 
+#if defined(ENABLE_FP4)
+template <>
+struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
+{
+    using type = __nv_fp4_e2m1;
+};
+#endif
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 } // namespace cutlass_kernels
diff --git a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h
deleted file mode 100644
index f8f0b7f07c..0000000000
--- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
-
-#ifdef ENABLE_FP4
-#include <cuda_fp4.h>
-#endif
-// We forward declare so we don't have to pull in a million cutlass includes
-namespace cutlass
-{
-// FP4 and FP6 types
-struct float_e2m1_t;
-struct float_e3m2_t;
-struct float_ue4m3_t;
-} // namespace cutlass
-
-namespace tensorrt_llm
-{
-namespace kernels
-{
-namespace cutlass_kernels
-{
-#if defined(ENABLE_FP4)
-template <>
-struct TllmToCutlassTypeAdapter<__nv_fp4_e2m1>
-{
-    using type = cutlass::float_e2m1_t;
-};
-#endif
-
-#if defined(ENABLE_FP4)
-template <>
-struct CutlassToTllmTypeAdapter<cutlass::float_e2m1_t>
-{
-    using type = __nv_fp4_e2m1;
-};
-#endif
-
-#if defined(ENABLE_FP4)
-template <>
-struct CutlassType<nvinfer1::DataType::kFP4>
-{
-    using type = cutlass::float_e2m1_t;
-};
-#endif
-
-} // namespace cutlass_kernels
-} // namespace kernels
-} // namespace tensorrt_llm
diff --git a/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp b/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
index 5274cd310b..d4a851f12f 100644
--- a/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
@@ -16,7 +16,7 @@
  */
 #include "gemmAllReducePlugin.h"
 #include "tensorrt_llm/common/assert.h"
-#include "tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h"
+#include "tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h"
 #include "tensorrt_llm/plugins/common/pluginUtils.h"
 
 #include <unistd.h>
diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
index c29bbb29ea..324715b5a2 100644
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@@ -846,7 +846,7 @@ def runLLMBuildFromPackage(pipeline, cpu_arch, reinstall_dependencies=false, whe
 
         # Folders and their allowed files
         declare -A ALLOWED=(
-            ["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]="internal_cutlass_type_conversion.h"
+            ["./tensorrt_llm/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/src"]=""
             ["./tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src"]=""
         )
 
diff --git a/jenkins/license_cpp.json b/jenkins/license_cpp.json
index a9435eb22c..fe0ad02d6b 100644
--- a/jenkins/license_cpp.json
+++ b/jenkins/license_cpp.json
@@ -20,19 +20,12 @@
             ],
             "license_name": "apache"
         },
-        {
-            "include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h",
-            "license_name": "apache"
-        },
         {
             "include_re": "^tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/src/",
             "license_name": "nvidia"
         },
         {
             "include_re": "^tensorrt_llm/kernels/internal_cutlass_kernels/src/",
-            "exclude_re": [
-                "^tensorrt_llm/kernels/internal_cutlass_kernels/src/internal_cutlass_type_conversion.h"
-            ],
             "license_name": "nvidia"
         }
     ],