diff --git a/README.md b/README.md
index 47b03b3351..a9e69f2f3a 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ TensorRT LLM
 * [08/01] Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)
 ✨ [➡️ link](./docs/source/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md)
 
-* [07/26] N-Gram Speculative Decoding in TensorRT‑LLM
+* [07/26] N-Gram Speculative Decoding in TensorRT LLM
 ✨ [➡️ link](./docs/source/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md)
 
 * [06/19] Disaggregated Serving in TensorRT LLM
diff --git a/benchmarks/cpp/bertBenchmark.cpp b/benchmarks/cpp/bertBenchmark.cpp
index 655feffe52..cc10a5b49e 100644
--- a/benchmarks/cpp/bertBenchmark.cpp
+++ b/benchmarks/cpp/bertBenchmark.cpp
@@ -135,7 +135,7 @@ void benchmarkBert(std::string const& modelName, std::filesystem::path const& da
 
 int main(int argc, char* argv[])
 {
-    cxxopts::Options options("TensorRT-LLM C++ Runtime Benchmark", "TensorRT-LLM C++ Runtime Benchmark for BERT.");
+    cxxopts::Options options("TensorRT LLM C++ Runtime Benchmark", "TensorRT LLM C++ Runtime Benchmark for BERT.");
     options.add_options()("h,help", "Print usage");
     options.add_options()(
         "m,model", "Model name specified for engines.", cxxopts::value<std::string>()->default_value("bert_base"));
diff --git a/benchmarks/cpp/disaggServerBenchmark.cpp b/benchmarks/cpp/disaggServerBenchmark.cpp
index ab00980275..89efae4539 100644
--- a/benchmarks/cpp/disaggServerBenchmark.cpp
+++ b/benchmarks/cpp/disaggServerBenchmark.cpp
@@ -1145,7 +1145,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
 int main(int argc, char* argv[])
 
 {
-    cxxopts::Options options("TensorRT-LLm DisaggServer Benchmark");
+    cxxopts::Options options("TensorRT LLM DisaggServer Benchmark");
     options.add_options()("h,help", "Print usage");
     options.add_options()("context_engine_dirs", "Directories that store context engines,separator is a ,",
         cxxopts::value<std::vector<std::string>>());
diff --git a/benchmarks/cpp/gptManagerBenchmark.cpp b/benchmarks/cpp/gptManagerBenchmark.cpp
index a586610f15..7bfe10f0df 100644
--- a/benchmarks/cpp/gptManagerBenchmark.cpp
+++ b/benchmarks/cpp/gptManagerBenchmark.cpp
@@ -1055,7 +1055,7 @@ void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngine
 int main(int argc, char* argv[])
 {
     cxxopts::Options options(
-        "TensorRT-LLM BatchManager Benchmark", "TensorRT-LLM BatchManager Benchmark for GPT and GPT-like models.");
+        "TensorRT LLM BatchManager Benchmark", "TensorRT LLM BatchManager Benchmark for GPT and GPT-like models.");
     options.add_options()("h,help", "Print usage");
     options.add_options()("engine_dir, decoder_engine_dir", "Directory that store the engines of decoder models.",
         cxxopts::value<std::string>());
diff --git a/cpp/include/tensorrt_llm/deep_gemm/compiler.cuh b/cpp/include/tensorrt_llm/deep_gemm/compiler.cuh
index 8c00e939d5..8ec9f2ed42 100644
--- a/cpp/include/tensorrt_llm/deep_gemm/compiler.cuh
+++ b/cpp/include/tensorrt_llm/deep_gemm/compiler.cuh
@@ -217,7 +217,7 @@ std::vector<std::filesystem::path> getJitIncludeDirs()
         }
         else
         {
-            TLLM_LOG_WARNING("Failed to find TensorRT-LLM installation, DeepGEMM will be disabled.");
+            TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled.");
         }
     }
     return includeDirs;
diff --git a/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp b/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp
index 48ac605a3f..81340ef463 100644
--- a/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp
+++ b/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp
@@ -165,7 +165,7 @@ CacheTransceiver::CacheTransceiver(kv_cache_manager::BaseKVCacheManager* cacheMa
         {
             void* ret = dllGetSym(handle, name);
             TLLM_CHECK_WITH_INFO(ret != nullptr,
-                "Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "
+                "Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "
                 "built with UCX support, please rebuild in UCX-enabled environment.");
             return ret;
         };
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h
index 34aa05ddc4..b12dbf47f6 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h
@@ -105,7 +105,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm100(T* D, void const* A, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -146,15 +146,15 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
             occupancy);
         break;
     case tkc::CutlassTileConfigSM100::Undefined:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
             "heuristic.");
         break;
     default:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -177,7 +177,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm120(T* D, void const* A, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -205,16 +205,16 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm120(T* D, void const* A, void const* B,
             occupancy);
         break;
     case tkc::CutlassTileConfigSM120::Undefined:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM120::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
+            "[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
             "heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -257,7 +257,7 @@ size_t dispatchMXFP8xMXFP4GemmClusterShapeSm100(T* D, void const* A, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -293,15 +293,15 @@ size_t dispatchMXFP8xMXFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
             occupancy);
         break;
     case tkc::CutlassTileConfigSM100::Undefined:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
             "heuristic.");
         break;
     default:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -338,7 +338,7 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
         else
         {
             throw std::runtime_error(
-                "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
+                "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
         }
     }
     else if constexpr (fp4GemmType == FP4GemmType::W4A4_NVFP4_NVFP4)
@@ -356,13 +356,13 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
         else
         {
             throw std::runtime_error(
-                "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
+                "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
         }
     }
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
+            "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
     }
 }
 
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/mxfp8_mxfp4_gemm_template_sm100.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/mxfp8_mxfp4_gemm_template_sm100.h
index 129ff4f1a4..4191b337fe 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/mxfp8_mxfp4_gemm_template_sm100.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/mxfp8_mxfp4_gemm_template_sm100.h
@@ -93,7 +93,7 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
     int* occupancy)
 {
     throw std::runtime_error(
-        "[TensorRT-LLM Error][FP4 gemm Runner] TensorRT-LLM is not compiled with support for this Architecture.");
+        "[TensorRT LLM Error][FP4 gemm Runner] TensorRT LLM is not compiled with support for this Architecture.");
 }
 
 #else
@@ -250,7 +250,7 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
     {
         std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got "
             + std::to_string(mMaxSmemSize);
-        throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);
     }
     /* // Return workspace size */
     if (!A && !B && !D)
@@ -261,28 +261,28 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
     {
         std::string errMsg("Requested workspace size insufficient. Required "
             + std::to_string(gemm.get_workspace_size(args)) + ", got " + std::to_string(workspaceBytes));
-        throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);
     }
     auto can_implement = gemm.can_implement(args);
     if (can_implement != cutlass::Status::kSuccess)
     {
         std::string errMsg = "MXFP8xMXFP4 Gemm cutlass kernel will fail for params. Error: "
             + std::string(cutlassGetStatusString(can_implement));
-        throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);
     }
     auto initStatus = gemm.initialize(args, workspace, stream);
     if (initStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg = "Failed to initialize cutlass MXFP8xMXFP4 gemm. Error: "
             + std::string(cutlassGetStatusString(initStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
     }
     auto runStatus = gemm.run(args, workspace, stream, nullptr, tensorrt_llm::common::getEnvEnablePDL());
     if (runStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg
             = "Failed to run cutlass MXFP8xMXFP4 gemm. Error: " + std::string(cutlassGetStatusString(runStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
     }
     return gemm.get_workspace_size(args);
 }
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm100.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm100.h
index da7b303351..c6c794f582 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm100.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm100.h
@@ -107,7 +107,7 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
         int* occupancy)                                                                                                \
     {                                                                                                                  \
         throw std::runtime_error(                                                                                      \
-            "[TensorRT-LLM Error][FP4 gemm Runner] TensorRT-LLM is not compiled with support for this Architecture."); \
+            "[TensorRT LLM Error][FP4 gemm Runner] TensorRT LLM is not compiled with support for this Architecture."); \
     }
 
 #else
@@ -268,7 +268,7 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
         {                                                                                                              \
             std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got " \
                 + std::to_string(mMaxSmemSize);                                                                        \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         /* // Return workspace size */                                                                                 \
         if (!A && !B && !D)                                                                                            \
@@ -279,28 +279,28 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
         {                                                                                                              \
             std::string errMsg("Requested workspace size insufficient. Required "                                      \
                 + std::to_string(gemm.get_workspace_size(args)) + ", got " + std::to_string(workspaceBytes));          \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         auto can_implement = gemm.can_implement(args);                                                                 \
         if (can_implement != cutlass::Status::kSuccess)                                                                \
         {                                                                                                              \
             std::string errMsg = "FP4 Gemm cutlass kernel will fail for params. Error: "                               \
                 + std::string(cutlassGetStatusString(can_implement));                                                  \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         auto initStatus = gemm.initialize(args, workspace, stream);                                                    \
         if (initStatus != cutlass::Status::kSuccess)                                                                   \
         {                                                                                                              \
             std::string errMsg                                                                                         \
                 = "Failed to initialize cutlass FP4 gemm. Error: " + std::string(cutlassGetStatusString(initStatus));  \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         auto runStatus = gemm.run(args, workspace, stream, nullptr, tensorrt_llm::common::getEnvEnablePDL());          \
         if (runStatus != cutlass::Status::kSuccess)                                                                    \
         {                                                                                                              \
             std::string errMsg                                                                                         \
                 = "Failed to run cutlass FP4 gemm. Error: " + std::string(cutlassGetStatusString(runStatus));          \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         return gemm.get_workspace_size(args);                                                                          \
     }
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm120.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm120.h
index 542357f0bd..d9eeda8476 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm120.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm120.h
@@ -69,7 +69,7 @@ size_t genericFp4GemmKernelLauncherSm120(void* D, void const* A, void const* B,
         int* occupancy)                                                                                                \
     {                                                                                                                  \
         throw std::runtime_error(                                                                                      \
-            "[TensorRT-LLM Error][FP4 gemm Runner] TensorRT-LLM is not compiled with support for this Architecture."); \
+            "[TensorRT LLM Error][FP4 gemm Runner] TensorRT LLM is not compiled with support for this Architecture."); \
     }
 
 #else
@@ -224,7 +224,7 @@ size_t genericFp4GemmKernelLauncherSm120(void* D, void const* A, void const* B,
         {                                                                                                              \
             std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got " \
                 + std::to_string(mMaxSmemSize);                                                                        \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         /* // Return workspace size */                                                                                 \
         if (!A && !B && !D)                                                                                            \
@@ -235,7 +235,7 @@ size_t genericFp4GemmKernelLauncherSm120(void* D, void const* A, void const* B,
         {                                                                                                              \
             std::string errMsg("Requested workspace size insufficient. Required "                                      \
                 + std::to_string(gemm.get_workspace_size(args)) + ", got " + std::to_string(workspaceBytes));          \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         auto initStatus = gemm.initialize(args, workspace);                                                            \
         if (initStatus != cutlass::Status::kSuccess)                                                                   \
@@ -243,14 +243,14 @@ size_t genericFp4GemmKernelLauncherSm120(void* D, void const* A, void const* B,
             auto cudaErrMsg = cudaGetErrorString(cudaGetLastError());                                                  \
             std::string errMsg = "Failed to initialize cutlass FP4 gemm. Error: "                                      \
                 + std::string(cutlass::cutlassGetStatusString(initStatus)) + " " + cudaErrMsg;                         \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         auto runStatus = gemm.run(args, workspace, stream, nullptr, tensorrt_llm::common::getEnvEnablePDL());          \
         if (runStatus != cutlass::Status::kSuccess)                                                                    \
         {                                                                                                              \
             std::string errMsg                                                                                         \
                 = "Failed to run cutlass FP4 gemm. Error: " + std::string(cutlass::cutlassGetStatusString(runStatus)); \
-            throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);                               \
+            throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);                               \
         }                                                                                                              \
         return gemm.get_workspace_size(args);                                                                          \
     }
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp8_rowwise_gemm/fp8_rowwise_gemm_template.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp8_rowwise_gemm/fp8_rowwise_gemm_template.h
index 350e5177a1..cda815d294 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp8_rowwise_gemm/fp8_rowwise_gemm_template.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp8_rowwise_gemm/fp8_rowwise_gemm_template.h
@@ -75,7 +75,7 @@ size_t typedFp8RowwiseGemmKernelLauncher(Gemm gemm, typename Gemm::Arguments arg
     {
         std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got "
             + std::to_string(mMaxSmemSize);
-        throw std::runtime_error("[TensorRT-LLM Error][fp8RowwiseGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fp8RowwiseGemm Runner] " + errMsg);
     }
 
     // Return workspace size
@@ -88,7 +88,7 @@ size_t typedFp8RowwiseGemmKernelLauncher(Gemm gemm, typename Gemm::Arguments arg
     {
         std::string errMsg("Requested workspace size insufficient. Required "
             + std::to_string(gemm.get_workspace_size(args)) + ", got " + std::to_string(workspaceBytes));
-        throw std::runtime_error("[TensorRT-LLM Error][fp8RowwiseGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fp8RowwiseGemm Runner] " + errMsg);
     }
 
     auto can_implement = gemm.can_implement(args);
@@ -96,21 +96,21 @@ size_t typedFp8RowwiseGemmKernelLauncher(Gemm gemm, typename Gemm::Arguments arg
     {
         std::string errMsg = "fp8RowwiseGemm cutlass kernel not implemented given the params. Error: "
             + std::string(cutlassGetStatusString(can_implement));
-        throw std::runtime_error("[TensorRT-LLM Error][fp8RowwiseGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fp8RowwiseGemm Runner] " + errMsg);
     }
 
     auto initStatus = gemm.initialize(args, workspace, stream);
     if (initStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg = "Failed to initialize. Error: " + std::string(cutlassGetStatusString(initStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][fp8RowwiseGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fp8RowwiseGemm Runner] " + errMsg);
     }
 
     auto runStatus = gemm.run(stream);
     if (runStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg = "Failed to run gemm. Error: " + std::string(cutlassGetStatusString(runStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][fp8RowwiseGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fp8RowwiseGemm Runner] " + errMsg);
     }
     return gemm.get_workspace_size(args);
 }
@@ -210,7 +210,7 @@ size_t dispatchGemmConfigSm89(void* D, void const* A, void const* B, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmConfigSm89] Config is invalid for "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmConfigSm89] Config is invalid for "
             "Fp8 Rowwise GEMM.");
         break;
     }
@@ -299,16 +299,16 @@ size_t dispatchGemmToCutlassSm89(void* D, void const* A, void const* B, void con
 
     case tkc::CutlassTileConfig::Undefined:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm89] gemm config undefined.");
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm89] gemm config undefined.");
         break;
     case tkc::CutlassTileConfig::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm89] gemm config should have "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm89] gemm config should have "
             "already been set by heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm89] Config is invalid for "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm89] Config is invalid for "
             "Fp8 Rowwise GEMM.");
         break;
     }
@@ -379,7 +379,7 @@ size_t genericFp8RowwiseGemmKernelLauncherSm90(void* D, void const* A, void cons
         Gemm{}, args, D, A, B, C_bias, workspace, workspaceBytes, stream, occupancy);
 #else  // COMPILE_HOPPER_TMA_GEMMS
     throw std::runtime_error(
-        "[TensorRT-LLm Error][Fp8RowwiseGemmKernelLauncherSm90] Please recompile with support for hopper by passing "
+        "[TensorRT LLM Error][Fp8RowwiseGemmKernelLauncherSm90] Please recompile with support for hopper by passing "
         "90-real as an arch to build_wheel.py.");
 #endif // COMPILE_HOPPER_TMA_GEMMS
 }
@@ -418,7 +418,7 @@ size_t dispatchGemmConfigSm90(void* D, void const* A, void const* B, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmConfigSm90] Config is invalid for "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmConfigSm90] Config is invalid for "
             "Fp8 Rowwise GEMM.");
         break;
     }
@@ -468,16 +468,16 @@ size_t dispatchGemmToCutlassSm90(void* D, void const* A, void const* B, void con
         break;
     case tkc::CutlassTileConfigSM90::Undefined:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm90] gemm config undefined.");
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm90] gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM90::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm90] gemm config should have "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm90] gemm config should have "
             "already been set by heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm90] Config is invalid for "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][dispatchGemmToCutlassSm90] Config is invalid for "
             "Fp8 Rowwise GEMM.");
         break;
     }
@@ -517,7 +517,7 @@ size_t CutlassFp8RowwiseGemmRunner<T>::dispatchToArch(void* D, void const* A, vo
 #endif
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFp8RowwiseGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS "
             "Fp8 Rowwise GEMM");
     }
     return 0;
@@ -585,7 +585,7 @@ std::vector<tkc::CutlassGemmConfig> CutlassFp8RowwiseGemmRunner<T>::getConfigs()
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFp8RowwiseGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS "
+            "[TensorRT LLM Error][CutlassFp8RowwiseGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS "
             "Fp8 Rowwise GEMM");
     }
     return candidateConfigs;
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h
index 07ea2923fb..37b0593fbf 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h
@@ -209,7 +209,7 @@ void generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType const
     {
         std::string err_msg = "fpA_intB cutlass kernel will fail for params. Error: "
             + std::string(cutlassGetStatusString(can_implement));
-        throw std::runtime_error("[TensorRT-LLm Error][fpA_intB Runner] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][fpA_intB Runner] " + err_msg);
     }
 
     auto init_status = gemm.initialize(args, workspace, stream);
@@ -217,7 +217,7 @@ void generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType const
     {
         std::string err_msg
             = "Failed to initialize cutlass fpA_intB gemm. Error: " + std::string(cutlassGetStatusString(init_status));
-        throw std::runtime_error("[TensorRT-LLm Error][fpA_intB Runner] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][fpA_intB Runner] " + err_msg);
     }
 
     auto run_status = gemm.run(stream);
@@ -225,7 +225,7 @@ void generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType const
     {
         std::string err_msg
             = "Failed to run cutlass fpA_intB gemm. Error: " + std::string(cutlassGetStatusString(run_status));
-        throw std::runtime_error("[TensorRT-LLm Error][fpA_intB Runner] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][fpA_intB Runner] " + err_msg);
     }
 }
 
@@ -247,14 +247,14 @@ void filter_and_run_mixed_gemm(ActivationType const* A, WeightType const* B, Sca
         // Multistage only supported on Ampere
         std::string err_msg = "Cutlass fpA_intB gemm not supported for arch "
             + std::to_string(arch::kMinComputeCapability) + " with stages set to " + std::to_string(Stages);
-        throw std::runtime_error("[TensorRT-LLm Error][filter_and_run_mixed_gemm] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][filter_and_run_mixed_gemm] " + err_msg);
     }
     else if constexpr (Stages == 2 && arch::kMinComputeCapability >= 89)
     {
         // Multistage only supported on Ampere
         std::string err_msg = "Cutlass fpA_intB gemm not supported for arch "
             + std::to_string(arch::kMinComputeCapability) + " with stages set to " + std::to_string(Stages);
-        throw std::runtime_error("[TensorRT-LLm Error][filter_and_run_mixed_gemm] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][filter_and_run_mixed_gemm] " + err_msg);
     }
     else if constexpr (cutlass::platform::is_same<ActivationType, __nv_fp8_e4m3>::value
         && arch::kMinComputeCapability < 89)
@@ -262,7 +262,7 @@ void filter_and_run_mixed_gemm(ActivationType const* A, WeightType const* B, Sca
         // FP8 activation type only supported on Ada+ GPUs
         std::string err_msg = "Cutlass fpA_intB gemm not supported for arch "
             + std::to_string(arch::kMinComputeCapability) + " with activation type set to FP8";
-        throw std::runtime_error("[TensorRT-LLm Error][filter_and_run_mixed_gemm] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][filter_and_run_mixed_gemm] " + err_msg);
     }
     else
     {
@@ -301,7 +301,7 @@ void dispatch_gemm_config(ActivationType const* A, WeightType const* B, ScaleZer
         break;
     default:
         std::string err_msg = "dispatch_gemm_config does not support stages " + std::to_string(gemm_config.stages);
-        throw std::runtime_error("[TensorRT-LLm Error][dispatch_gemm_config] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][dispatch_gemm_config] " + err_msg);
         break;
     }
 }
@@ -370,16 +370,16 @@ void dispatch_gemm_to_cutlass(ActivationType const* A, WeightType const* B, Scal
                 C, m, n, k, group_size, gemm_config, workspace, workspace_bytes, stream, occupancy);
             break;
         case tkc::CutlassTileConfig::Undefined:
-            throw std::runtime_error("[TensorRT-LLm Error][fpA_intB][dispatch_gemm_to_cutlass] gemm config undefined.");
+            throw std::runtime_error("[TensorRT LLM Error][fpA_intB][dispatch_gemm_to_cutlass] gemm config undefined.");
             break;
         case tkc::CutlassTileConfig::ChooseWithHeuristic:
             throw std::runtime_error(
-                "[TensorRT-LLm Error][fpA_intB][dispatch_gemm_to_cutlass] gemm config should have already been set by "
+                "[TensorRT LLM Error][fpA_intB][dispatch_gemm_to_cutlass] gemm config should have already been set by "
                 "heuristic.");
             break;
         default:
             throw std::runtime_error(
-                "[TensorRT-LLm Error][fpA_intB][dispatch_gemm_to_cutlass] Config is invalid for mixed type GEMM.");
+                "[TensorRT LLM Error][fpA_intB][dispatch_gemm_to_cutlass] Config is invalid for mixed type GEMM.");
             break;
         }
     }
@@ -387,7 +387,7 @@ void dispatch_gemm_to_cutlass(ActivationType const* A, WeightType const* B, Scal
     {
         // This is not a limitation in CUTLASS. We just do not need to support this case.
         std::string err_msg = "The activation type must equal the scale, bias and output types on Ampere and earlier.";
-        throw std::runtime_error("[TensorRT-LLm Error][dispatch_gemm_to_cutlass] " + err_msg);
+        throw std::runtime_error("[TensorRT LLM Error][dispatch_gemm_to_cutlass] " + err_msg);
     }
 }
 
@@ -439,7 +439,7 @@ void CutlassFpAIntBGemmRunner<ActivationType, WeightType, QuantOp, ScaleZeroType
         if constexpr (cutlass::platform::is_same<ActivationType, __nv_fp8_e4m3>::value)
         {
             throw std::runtime_error(
-                "[TensorRT-LLM Error][CutlassFpAIntBGemmRunner][dispatch_to_arch] INT4xFP8 GEMM for Ada needs "
+                "[TensorRT LLM Error][CutlassFpAIntBGemmRunner][dispatch_to_arch] INT4xFP8 GEMM for Ada needs "
                 "CUDA>=12.4");
         }
 #endif
@@ -459,7 +459,7 @@ void CutlassFpAIntBGemmRunner<ActivationType, WeightType, QuantOp, ScaleZeroType
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFpAIntBGemmRunner][dispatch_to_arch] Arch unsupported for CUTLASS mixed type "
+            "[TensorRT LLM Error][CutlassFpAIntBGemmRunner][dispatch_to_arch] Arch unsupported for CUTLASS mixed type "
             "GEMM");
     }
 }
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template_sm90.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template_sm90.h
index 2253e2339b..061394f82c 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template_sm90.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template_sm90.h
@@ -62,7 +62,7 @@ void sm90_dispatch_epilogue_schedules(ActivationType const* A, WeightType const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][fpA_intB][sm90_dispatch_epilogue_schedules] epilogue schedule config is invalid for "
+            "[TensorRT LLM Error][fpA_intB][sm90_dispatch_epilogue_schedules] epilogue schedule config is invalid for "
             "mixed "
             "type GEMM.");
         break;
@@ -135,7 +135,7 @@ void sm90_dispatch_mainloop_schedules(ActivationType const* A, WeightType const*
             break;
         default:
             throw std::runtime_error(
-                "[TensorRT-LLM Error][fpA_intB][sm90_dispatch_mainloop_schedules] mainloop schedule config is invalid "
+                "[TensorRT LLM Error][fpA_intB][sm90_dispatch_mainloop_schedules] mainloop schedule config is invalid "
                 "for "
                 "mixed type GEMM.");
             break;
@@ -144,7 +144,7 @@ void sm90_dispatch_mainloop_schedules(ActivationType const* A, WeightType const*
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][fpA_intB][sm90_dispatch_mainloop_schedules] Unsupported CTA and Cluster shapes for "
+            "[TensorRT LLM Error][fpA_intB][sm90_dispatch_mainloop_schedules] Unsupported CTA and Cluster shapes for "
             "mixed type GEMM.");
     }
 }
@@ -181,7 +181,7 @@ void sm90_dispatch_gemm_config(ActivationType const* A, WeightType const* B, Sca
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][fpA_intB][dispatch_CGA_config] Config is invalid for mixed type GEMM.");
+            "[TensorRT LLM Error][fpA_intB][dispatch_CGA_config] Config is invalid for mixed type GEMM.");
         break;
     }
 }
@@ -254,16 +254,16 @@ void sm90_dispatch_gemm_to_cutlass(ActivationType const* A, WeightType const* B,
         break;
     case tkc::CutlassTileConfigSM90::Undefined:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][fpA_intB][sm90_dispatch_gemm_to_cutlass] gemm config undefined.");
+            "[TensorRT LLM Error][fpA_intB][sm90_dispatch_gemm_to_cutlass] gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM90::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][fpA_intB][sm90_dispatch_gemm_to_cutlass] gemm config should have already been set by "
+            "[TensorRT LLM Error][fpA_intB][sm90_dispatch_gemm_to_cutlass] gemm config should have already been set by "
             "heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][fpA_intB][sm90_dispatch_gemm_to_cutlass] Config is invalid for mixed type GEMM.");
+            "[TensorRT LLM Error][fpA_intB][sm90_dispatch_gemm_to_cutlass] Config is invalid for mixed type GEMM.");
         break;
     }
 }
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/launchers/fpA_intB_launcher_sm90.inl b/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/launchers/fpA_intB_launcher_sm90.inl
index 2878e3cd22..be930e60f6 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/launchers/fpA_intB_launcher_sm90.inl
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fpA_intB_gemm/launchers/fpA_intB_launcher_sm90.inl
@@ -193,7 +193,7 @@ void sm90_generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType
             if (group_size % cta_shape_k != 0)
             {
                 std::string err_msg = "The group size must a multiple of " + std::to_string(cta_shape_k);
-                throw std::runtime_error("[TensorRT-LLm Error][fpA_intB Runner]" + err_msg);
+                throw std::runtime_error("[TensorRT LLM Error][fpA_intB Runner]" + err_msg);
             }
 
             if constexpr (QuantOp == cutlass::WeightOnlyQuantOp::FINEGRAINED_SCALE_ONLY)
@@ -249,7 +249,7 @@ void sm90_generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType
         Gemm gemm;
         if (gemm.get_workspace_size(args) > workspace_bytes)
         {
-            TLLM_LOG_ERROR("[TensorRT-LLm Error][fpA_intB Runner] given workspace size insufficient.");
+            TLLM_LOG_ERROR("[TensorRT LLM Error][fpA_intB Runner] given workspace size insufficient.");
         }
 
         auto can_implement = gemm.can_implement(args);
@@ -258,7 +258,7 @@ void sm90_generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType
             std::string err_msg = "fpA_intB cutlass kernel will fail for params. Error: "
                 + std::string(cutlassGetStatusString(can_implement));
             std::cout << err_msg << std::endl;
-            throw std::runtime_error("[TensorRT-LLm Error][fpA_intB Runner] " + err_msg);
+            throw std::runtime_error("[TensorRT LLM Error][fpA_intB Runner] " + err_msg);
         }
 
         auto init_status = gemm.initialize(args, workspace, stream);
@@ -266,7 +266,7 @@ void sm90_generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType
         {
             std::string err_msg = "Failed to initialize cutlass fpA_intB gemm. Error: "
                 + std::string(cutlassGetStatusString(init_status));
-            throw std::runtime_error("[TensorRT-LLm Error][fpA_intB Runner] " + err_msg);
+            throw std::runtime_error("[TensorRT LLM Error][fpA_intB Runner] " + err_msg);
         }
 
         auto run_status = gemm.run(stream);
@@ -274,13 +274,13 @@ void sm90_generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType
         {
             std::string err_msg
                 = "Failed to run cutlass fpA_intB gemm. Error: " + std::string(cutlassGetStatusString(run_status));
-            throw std::runtime_error("[TensorRT-LLm Error][fpA_intB Runner] " + err_msg);
+            throw std::runtime_error("[TensorRT LLM Error][fpA_intB Runner] " + err_msg);
         }
     }
     else
     {
         std::stringstream ss;
-        ss << "[TensorRT-LLm Error][fpA_intB Runner] Config (" << (int64_t) cute::size<0>(CTAShape{}) << ","
+        ss << "[TensorRT LLM Error][fpA_intB Runner] Config (" << (int64_t) cute::size<0>(CTAShape{}) << ","
            << (int64_t) cute::size<1>(CTAShape{}) << "," << (int64_t) cute::size<2>(CTAShape{}) << ") ("
            << (int64_t) cute::size<0>(ClusterShape{}) << "," << (int64_t) cute::size<1>(ClusterShape{}) << ","
            << (int64_t) cute::size<2>(ClusterShape{}) << ") not compiled with FAST_BUILD.";
@@ -290,7 +290,7 @@ void sm90_generic_mixed_gemm_kernelLauncher(ActivationType const* A, WeightType
 
 #else  // COMPILE_HOPPER_TMA_GEMMS
     throw std::runtime_error(
-        "[TensorRT-LLm Error][fpA_intB Runner] Please recompile with support for hopper by passing 90-real as an arch "
+        "[TensorRT LLM Error][fpA_intB Runner] Please recompile with support for hopper by passing 90-real as an arch "
         "to build_wheel.py.");
 #endif // COMPILE_HOPPER_TMA_GEMMS
 }
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fused_gated_gemm/fused_gated_gemm_template.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/fused_gated_gemm/fused_gated_gemm_template.h
index d4fee178e0..ce175160a9 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/fused_gated_gemm/fused_gated_gemm_template.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/fused_gated_gemm/fused_gated_gemm_template.h
@@ -67,7 +67,7 @@ size_t typedGemmGatedKernelLauncher(Gemm gemm, typename Gemm::Arguments args, vo
     {
         std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got "
             + std::to_string(mMaxSmemSize);
-        throw std::runtime_error("[TensorRT-LLM Error][fusedGatedGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fusedGatedGemm Runner] " + errMsg);
     }
 
     // Return workspace size
@@ -80,7 +80,7 @@ size_t typedGemmGatedKernelLauncher(Gemm gemm, typename Gemm::Arguments args, vo
     {
         std::string errMsg("Requested workspace size insufficient. Required "
             + std::to_string(gemm.get_workspace_size(args)) + ", got " + std::to_string(workspaceBytes));
-        throw std::runtime_error("[TensorRT-LLM Error][fusedGatedGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fusedGatedGemm Runner] " + errMsg);
     }
 
     auto can_implement = gemm.can_implement(args);
@@ -88,21 +88,21 @@ size_t typedGemmGatedKernelLauncher(Gemm gemm, typename Gemm::Arguments args, vo
     {
         std::string errMsg = "fusedGatedGemm cutlass kernel not implemented given the params. Error: "
             + std::string(cutlassGetStatusString(can_implement));
-        throw std::runtime_error("[TensorRT-LLM Error][fusedGatedGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fusedGatedGemm Runner] " + errMsg);
     }
 
     auto initStatus = gemm.initialize(args, workspace, stream);
     if (initStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg = "Failed to initialize. Error: " + std::string(cutlassGetStatusString(initStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][fusedGatedGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fusedGatedGemm Runner] " + errMsg);
     }
 
     auto runStatus = gemm.run(stream);
     if (runStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg = "Failed to run gemm. Error: " + std::string(cutlassGetStatusString(runStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][fusedGatedGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][fusedGatedGemm Runner] " + errMsg);
     }
     return gemm.get_workspace_size(args);
 }
@@ -165,7 +165,7 @@ size_t genericGemmGatedKernelLauncherSm90(void* D, void const* A, void const* B,
     return typedGemmGatedKernelLauncher(Gemm{}, args, D, A, B, C_bias, workspace, workspaceBytes, stream, occupancy);
 #else  // COMPILE_HOPPER_TMA_GEMMS
     throw std::runtime_error(
-        "[TensorRT-LLm Error][GemmGatedKernelLauncherSm90] Please recompile with support for hopper by passing 90-real "
+        "[TensorRT LLM Error][GemmGatedKernelLauncherSm90] Please recompile with support for hopper by passing 90-real "
         "as an arch to build_wheel.py.");
 #endif // COMPILE_HOPPER_TMA_GEMMS
 }
@@ -204,7 +204,7 @@ size_t dispatchGemmConfigSm90(void* D, void const* A, void const* B, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFusedGatedGemmRunner][dispatchGemmConfigSm90] Config is invalid for fused "
+            "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][dispatchGemmConfigSm90] Config is invalid for fused "
             "gated GEMM.");
         break;
     }
@@ -255,17 +255,17 @@ size_t dispatchGemmToCutlassSm90(void* D, void const* A, void const* B, void con
         break;
     case tkc::CutlassTileConfigSM90::Undefined:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFusedGatedGemmRunner][dispatchGemmToCutlassSm90] gemm config undefined.");
+            "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][dispatchGemmToCutlassSm90] gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM90::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFusedGatedGemmRunner][dispatchGemmToCutlassSm90] gemm config should have "
+            "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][dispatchGemmToCutlassSm90] gemm config should have "
             "already been set by "
             "heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassFusedGatedGemmRunner][dispatchGemmToCutlassSm90] Config is invalid for fused "
+            "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][dispatchGemmToCutlassSm90] Config is invalid for fused "
             "gated GEMM.");
         break;
     }
@@ -302,14 +302,14 @@ size_t CutlassFusedGatedGemmRunner<T>::dispatchToArch(void* D, void const* A, vo
 #endif
         {
             throw std::runtime_error(
-                "[TensorRT-LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS fused "
+                "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS fused "
                 "gated GEMM");
         }
     }
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] dtype unsupported for CUTLASS fused "
+            "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] dtype unsupported for CUTLASS fused "
             "gated "
             "GEMM");
     }
@@ -340,7 +340,7 @@ std::vector<tkc::CutlassGemmConfig> CutlassFusedGatedGemmRunner<T>::getConfigs()
         if (mSm != 90)
         {
             throw std::runtime_error(
-                "[TensorRT-LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS fused "
+                "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS fused "
                 "gated GEMM");
         }
         tkc::CutlassGemmConfig::CandidateConfigTypeParam config_type_param
@@ -378,7 +378,7 @@ std::vector<tkc::CutlassGemmConfig> CutlassFusedGatedGemmRunner<T>::getConfigs()
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] dtype unsupported for CUTLASS fused "
+            "[TensorRT LLM Error][CutlassFusedGatedGemmRunner][GEMM Dispatch] dtype unsupported for CUTLASS fused "
             "gated "
             "GEMM");
     }
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/int8_gemm/int8_gemm_template.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/int8_gemm/int8_gemm_template.h
index c44caae0fa..4d4916d563 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/int8_gemm/int8_gemm_template.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/int8_gemm/int8_gemm_template.h
@@ -150,7 +150,7 @@ void genericInt8GemmKernelLauncher(int8_t const* A, int8_t const* B, tk::QuantMo
     {
         std::string errMsg = "int8gemm cutlass kernel will fail for params. Error: "
             + std::string(cutlassGetStatusString(can_implement));
-        throw std::runtime_error("[TensorRT-LLM Error][int8gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][int8gemm Runner] " + errMsg);
     }
 
     auto initStatus = gemm.initialize(args, workspace, stream);
@@ -158,7 +158,7 @@ void genericInt8GemmKernelLauncher(int8_t const* A, int8_t const* B, tk::QuantMo
     {
         std::string errMsg
             = "Failed to initialize cutlass int8 gemm. Error: " + std::string(cutlassGetStatusString(initStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][int8gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][int8gemm Runner] " + errMsg);
     }
 
     auto runStatus = gemm.run(stream);
@@ -166,7 +166,7 @@ void genericInt8GemmKernelLauncher(int8_t const* A, int8_t const* B, tk::QuantMo
     {
         std::string errMsg
             = "Failed to run cutlass int8 gemm. Error: " + std::string(cutlassGetStatusString(runStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][int8gemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][int8gemm Runner] " + errMsg);
     }
 }
 
@@ -180,7 +180,7 @@ struct dispatchStages
         TLLM_LOG_DEBUG(__PRETTY_FUNCTION__);
         std::string errMsg = "Cutlass int8 gemm. Not instantiates for arch "
             + std::to_string(arch::kMinComputeCapability) + " with stages set to " + std::to_string(Stages);
-        throw std::runtime_error("[TensorRT-LLM Error][dispatchStages::dispatch] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][dispatchStages::dispatch] " + errMsg);
     }
 };
 
@@ -248,7 +248,7 @@ void dispatchGemmConfig(int8_t const* A, int8_t const* B, tk::QuantMode quantOpt
         break;
     default:
         std::string errMsg = "dispatchGemmConfig does not support stages " + std::to_string(gemmConfig.stages);
-        throw std::runtime_error("[TensorRT-LLM Error][dispatch_gemm_config] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][dispatch_gemm_config] " + errMsg);
         break;
     }
 }
@@ -288,16 +288,16 @@ void dispatchGemmToCutlass(int8_t const* A, int8_t const* B, tk::QuantMode quant
             quantOption, alphaCol, alphaRow, C, m, n, k, gemmConfig, workspace, workspaceBytes, stream, occupancy);
         break;
     case tkc::CutlassTileConfig::Undefined:
-        throw std::runtime_error("[TensorRT-LLM Error][int8][dispatch_gemm_to_cutlass] gemm config undefined.");
+        throw std::runtime_error("[TensorRT LLM Error][int8][dispatch_gemm_to_cutlass] gemm config undefined.");
         break;
     case tkc::CutlassTileConfig::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][int8][dispatch_gemm_to_cutlass] gemm config should have already been set by "
+            "[TensorRT LLM Error][int8][dispatch_gemm_to_cutlass] gemm config should have already been set by "
             "heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][int8][dispatch_gemm_to_cutlass] Config is invalid for int8 GEMM.");
+            "[TensorRT LLM Error][int8][dispatch_gemm_to_cutlass] Config is invalid for int8 GEMM.");
         break;
     }
 }
@@ -342,7 +342,7 @@ void CutlassInt8GemmRunner<T>::dispatchToArch(int8_t const* A, int8_t const* B,
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassInt8GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS int8 GEMM");
+            "[TensorRT LLM Error][CutlassInt8GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS int8 GEMM");
     }
 }
 
@@ -364,7 +364,7 @@ std::vector<tkc::CutlassGemmConfig> CutlassInt8GemmRunner<T>::getConfigs() const
     if (mSm <= 70)
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassInt8GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS int8 GEMM");
+            "[TensorRT LLM Error][CutlassInt8GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS int8 GEMM");
     }
 
     std::vector<tkc::CutlassGemmConfig> candidateConfigs = get_candidate_configs(mSm, SPLIT_K_LIMIT, config_type_param);
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/low_latency_gemm/fp8_low_latency_gemm_template.h b/cpp/tensorrt_llm/kernels/cutlass_kernels/low_latency_gemm/fp8_low_latency_gemm_template.h
index 42a6beaf9d..2395650223 100644
--- a/cpp/tensorrt_llm/kernels/cutlass_kernels/low_latency_gemm/fp8_low_latency_gemm_template.h
+++ b/cpp/tensorrt_llm/kernels/cutlass_kernels/low_latency_gemm/fp8_low_latency_gemm_template.h
@@ -195,7 +195,7 @@ size_t genericFp8LowLatencyGemmKernelLauncherSm90(__nv_fp8_e4m3 const* A, __nv_f
     {
         std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got "
             + std::to_string(mMaxSmemSize);
-        throw std::runtime_error("[TensorRT-LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
     }
 
     // Return workspace size
@@ -208,7 +208,7 @@ size_t genericFp8LowLatencyGemmKernelLauncherSm90(__nv_fp8_e4m3 const* A, __nv_f
     {
         std::string errMsg("Requested workspace size insufficient. Required "
             + std::to_string(gemm.get_workspace_size(arguments)) + ", got " + std::to_string(workspaceBytes));
-        throw std::runtime_error("[TensorRT-LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
     }
 
     auto can_implement = gemm.can_implement(arguments);
@@ -216,26 +216,26 @@ size_t genericFp8LowLatencyGemmKernelLauncherSm90(__nv_fp8_e4m3 const* A, __nv_f
     {
         std::string errMsg = "Fp8LowLatencyGemm cutlass kernel not implemented given the params. Error: "
             + std::string(cutlassGetStatusString(can_implement));
-        throw std::runtime_error("[TensorRT-LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
     }
 
     auto initStatus = gemm.initialize(arguments, workspacePtr);
     if (initStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg = "Failed to initialize. Error: " + std::string(cutlassGetStatusString(initStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
     }
 
     auto runStatus = gemm.run(stream, nullptr, pdl_overlap_ratio >= 0);
     if (runStatus != cutlass::Status::kSuccess)
     {
         std::string errMsg = "Failed to run gemm. Error: " + std::string(cutlassGetStatusString(runStatus));
-        throw std::runtime_error("[TensorRT-LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
+        throw std::runtime_error("[TensorRT LLM Error][Fp8LowLatencyGemm Runner] " + errMsg);
     }
     return gemm.get_workspace_size(arguments);
 #else  // COMPILE_HOPPER_TMA_GEMMS
     throw std::runtime_error(
-        "[TensorRT-LLm Error][genericFp8LowLatencyGemmKernelLauncherSm90] Please recompile with support for hopper by "
+        "[TensorRT LLM Error][genericFp8LowLatencyGemmKernelLauncherSm90] Please recompile with support for hopper by "
         "passing 90-real as an arch to build_wheel.py.");
 #endif // COMPILE_HOPPER_TMA_GEMMS
 }
@@ -264,7 +264,7 @@ size_t dispatchLowLatencyGemmCultassKernelSchedSm90(__nv_fp8_e4m3 const* A, __nv
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmCultassKernelSchedSm90] Config "
+            "[TensorRT LLM Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmCultassKernelSchedSm90] Config "
             "is "
             "invalid for low latency fp8 gemm");
         break;
@@ -300,7 +300,7 @@ size_t dispatchLowLatencyGemmClusterShapeSm90(__nv_fp8_e4m3 const* A, __nv_fp8_e
 
     default:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmClusterShapeSm90] Config is "
+            "[TensorRT LLM Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmClusterShapeSm90] Config is "
             "invalid for low latency fp8 gemm");
         break;
     }
@@ -369,19 +369,19 @@ size_t dispatchLowLatencyGemmToCutlassSm90(__nv_fp8_e4m3 const* A, __nv_fp8_e4m3
         break;
     case tkc::CutlassTileConfigSM90::Undefined:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmToCutlassSm90] gemm config "
+            "[TensorRT LLM Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmToCutlassSm90] gemm config "
             "undefined.");
         break;
     case tkc::CutlassTileConfigSM90::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmToCutlassSm90] gemm config "
+            "[TensorRT LLM Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmToCutlassSm90] gemm config "
             "should have "
             "already been set by "
             "heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLm Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmToCutlassSm90] Config is "
+            "[TensorRT LLM Error][CutlassLowLatencyFp8GemmRunner][dispatchLowLatencyGemmToCutlassSm90] Config is "
             "invalid for low latency fp8 gemm");
         break;
     }
@@ -413,7 +413,7 @@ size_t CutlassLowLatencyFp8GemmRunner<T>::dispatchToArch(__nv_fp8_e4m3 const* A,
     {
 
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassLowLatencyFp8GemmRunner][GEMM Dispatch] dtype unsupported for CUTLASS Low "
+            "[TensorRT LLM Error][CutlassLowLatencyFp8GemmRunner][GEMM Dispatch] dtype unsupported for CUTLASS Low "
             "Latency Gemm");
     }
     return 0;
@@ -499,7 +499,7 @@ std::vector<ConfigType> CutlassLowLatencyFp8GemmRunner<T>::getConfigs() const
     if (mSm != 90)
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassLowLatencyFp8GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP8 Low "
+            "[TensorRT LLM Error][CutlassLowLatencyFp8GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP8 Low "
             "Latency GEMM");
     }
     tkc::CutlassGemmConfig::CandidateConfigTypeParam config_type_param
diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/BatchedGemmInterface.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/BatchedGemmInterface.h
index 53bd7bc33c..2720bf4232 100644
--- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/BatchedGemmInterface.h
+++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/BatchedGemmInterface.h
@@ -235,12 +235,12 @@ struct BatchedGemmData
         void const* mPtrBias{nullptr};
 
         // The output tensor scaling factor for MxFp{4,8}, Fp8 and NvFp4 quantization.
-        // TensorRT-LLM API requires a scaling factor on the device.
+        // TensorRT LLM API requires a scaling factor on the device.
         // Shape is [B].
         float const* mPtrScaleC{nullptr};
 
         // The output gate scale for MxFp{4,8} and NvFp4 quantization.
-        // TensorRT-LLM API requires a scaling factor on the device.
+        // TensorRT LLM API requires a scaling factor on the device.
         // Shape is [B].
         float const* mPtrScaleGate{nullptr};
 
diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/KernelParams.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/KernelParams.h
index 0ebe9a94c8..56ea4561c7 100644
--- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/KernelParams.h
+++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/KernelParams.h
@@ -214,12 +214,12 @@ struct KernelParams
     //     ScaleC    = SEncC
     //
     // The output tensor scaling factor for MxFp{4,8}, Fp8, NvFp4 and DeepSeek FP8 quantization.
-    // TensorRT-LLM API requires a scaling factor on the device.
+    // TensorRT LLM API requires a scaling factor on the device.
     // Shape is [B]. One scaling factor per tensor in batch.
     float const* ptrScaleC{nullptr};
 
     // The output gate scale for MxFp{4,8}, Fp8, NvFp4 and DeepSeek FP8 quantization.
-    // TensorRT-LLM API requires a scaling factor on the device.
+    // TensorRT LLM API requires a scaling factor on the device.
     // Shape is [B]. One scaling factor per tensor in batch.
     float const* ptrScaleGate{nullptr};
 
diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/GemmInterface.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/GemmInterface.h
index 459d831e0b..f17d691002 100644
--- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/GemmInterface.h
+++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/GemmInterface.h
@@ -143,7 +143,7 @@ struct GemmData
         void const* mPtrPerTokenSfB{nullptr};
 
         // The output tensor scaling factor for MxFp{4,8}, Fp8, NvFp4 and DeepSeek FP8 quantization.
-        // TensorRT-LLM API requires a scaling factor on the device.
+        // TensorRT LLM API requires a scaling factor on the device.
         // Shape is [1].
         void* mPtrScaleC{nullptr};
     };
diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/KernelParams.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/KernelParams.h
index 142e9728dc..0e0a02d16b 100644
--- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/KernelParams.h
+++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/trtllmGen_gemm_export/KernelParams.h
@@ -204,7 +204,7 @@ struct KernelParams
     void* ptrSfC;
 
     // The output tensor scaling factor for MxFp{4,8}, Fp8, NvFp4 and DeepSeek FP8 quantization.
-    // TensorRT-LLM API requires a scaling factor on the device.
+    // TensorRT LLM API requires a scaling factor on the device.
     // Shape is [1].
     float const* ptrScaleC;
 
diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/GemmGatedActInterface.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/GemmGatedActInterface.h
index a8087dc59a..a3f83cead9 100644
--- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/GemmGatedActInterface.h
+++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/GemmGatedActInterface.h
@@ -133,11 +133,11 @@ struct GemmGatedActData
         void const* mPtrPerTokenSfB{nullptr};
 
         // The output tensor scaling factor for MxFp{4,8}, Fp8, NvFp4 and DeepSeek FP8 quantization.
-        // TensorRT-LLM API requires a scaling factor on the device.
+        // TensorRT LLM API requires a scaling factor on the device.
         // Shape is [1].
         void const* mPtrScaleC{nullptr};
         // The output gate scale for MxFp{4,8}, NvFp4 and DeepSeek FP8 quantization.
-        // TensorRT-LLM API requires a scaling factor on the device.
+        // TensorRT LLM API requires a scaling factor on the device.
         // Shape is [1].
         void const* mPtrScaleGate{nullptr};
     };
diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/KernelParams.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/KernelParams.h
index 4a7bde2a17..b9bdd2c3a8 100644
--- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/KernelParams.h
+++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/gemmGatedAct/trtllmGen_gatedAct_export/KernelParams.h
@@ -290,7 +290,7 @@ struct KernelParams
     // y = act(ptrScaleGate[0] * y1) * (ptrScaleC[0] * y2)
     //
     // The output tensor scaling factor for MxFp{4,8}, NvFp4 and DeepSeek FP8 quantization.
-    // TensorRT-LLM API requires a scaling factor on the device.
+    // TensorRT LLM API requires a scaling factor on the device.
     // Shape is [1].
     float const* ptrScaleC;
     // The output gate scale for MxFp{4,8}, NvFp4 and DeepSeek FP8 quantization.
diff --git a/cpp/tensorrt_llm/nanobind/bindings.cpp b/cpp/tensorrt_llm/nanobind/bindings.cpp
index 89cfa72211..7961dac599 100644
--- a/cpp/tensorrt_llm/nanobind/bindings.cpp
+++ b/cpp/tensorrt_llm/nanobind/bindings.cpp
@@ -73,7 +73,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
 
 NB_MODULE(TRTLLM_NB_MODULE, m)
 {
-    m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
+    m.doc() = "TensorRT LLM Python bindings for C++ runtime";
     m.attr("binding_type") = "nanobind";
     nb::set_leak_warnings(false);
 
diff --git a/cpp/tensorrt_llm/plugins/bertAttentionPlugin/bertAttentionPlugin.cpp b/cpp/tensorrt_llm/plugins/bertAttentionPlugin/bertAttentionPlugin.cpp
index e2fab9044c..4e6b29be99 100644
--- a/cpp/tensorrt_llm/plugins/bertAttentionPlugin/bertAttentionPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/bertAttentionPlugin/bertAttentionPlugin.cpp
@@ -125,7 +125,7 @@ BertAttentionPlugin::BertAttentionPlugin(void const* data, size_t length)
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/cudaStreamPlugin/cudaStreamPlugin.cpp b/cpp/tensorrt_llm/plugins/cudaStreamPlugin/cudaStreamPlugin.cpp
index bf3bdc0297..802e828c92 100644
--- a/cpp/tensorrt_llm/plugins/cudaStreamPlugin/cudaStreamPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/cudaStreamPlugin/cudaStreamPlugin.cpp
@@ -48,7 +48,7 @@ CudaStreamPlugin::CudaStreamPlugin(void const* data, size_t length)
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/eaglePlugin/eagleDecodeDraftTokensPlugin.cpp b/cpp/tensorrt_llm/plugins/eaglePlugin/eagleDecodeDraftTokensPlugin.cpp
index 9aa660dda5..899c93855b 100644
--- a/cpp/tensorrt_llm/plugins/eaglePlugin/eagleDecodeDraftTokensPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/eaglePlugin/eagleDecodeDraftTokensPlugin.cpp
@@ -58,7 +58,7 @@ EagleDecodeDraftTokensPlugin::EagleDecodeDraftTokensPlugin(void const* data, siz
     read(d, mTopKSampling);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         static_cast<int>(length), static_cast<int>(d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/eaglePlugin/eagleSampleAndAcceptDraftTokensPlugin.cpp b/cpp/tensorrt_llm/plugins/eaglePlugin/eagleSampleAndAcceptDraftTokensPlugin.cpp
index a2ebf2ee55..5fb30f5837 100644
--- a/cpp/tensorrt_llm/plugins/eaglePlugin/eagleSampleAndAcceptDraftTokensPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/eaglePlugin/eagleSampleAndAcceptDraftTokensPlugin.cpp
@@ -52,7 +52,7 @@ EagleSampleAndAcceptDraftTokensPlugin::EagleSampleAndAcceptDraftTokensPlugin(voi
     read(d, mDtype);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/fusedLayernormPlugin/fusedLayernormPlugin.cpp b/cpp/tensorrt_llm/plugins/fusedLayernormPlugin/fusedLayernormPlugin.cpp
index 030895123a..541afdadc4 100644
--- a/cpp/tensorrt_llm/plugins/fusedLayernormPlugin/fusedLayernormPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/fusedLayernormPlugin/fusedLayernormPlugin.cpp
@@ -47,7 +47,7 @@ FusedLayernormPlugin::FusedLayernormPlugin(void const* data, size_t length)
     read(d, mType);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp b/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
index 4cec38b046..08ee2af554 100644
--- a/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/gemmAllReducePlugin/gemmAllReducePlugin.cpp
@@ -203,7 +203,7 @@ static GemmAllReducePluginOptions deserializeOptions(void const*& data, size_t l
 
     TLLM_CHECK_WITH_INFO(end == begin + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (end - begin));
 
diff --git a/cpp/tensorrt_llm/plugins/gemmPlugin/gemmPlugin.cpp b/cpp/tensorrt_llm/plugins/gemmPlugin/gemmPlugin.cpp
index ac20e2cfd3..9e06ad01d1 100644
--- a/cpp/tensorrt_llm/plugins/gemmPlugin/gemmPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/gemmPlugin/gemmPlugin.cpp
@@ -179,7 +179,7 @@ GemmPlugin::GemmPlugin(void const* data, size_t length, GemmPlugin::PluginProfil
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/gptAttentionCommon/gptAttentionCommon.cpp b/cpp/tensorrt_llm/plugins/gptAttentionCommon/gptAttentionCommon.cpp
index 98e59c8fdd..fa160070e4 100644
--- a/cpp/tensorrt_llm/plugins/gptAttentionCommon/gptAttentionCommon.cpp
+++ b/cpp/tensorrt_llm/plugins/gptAttentionCommon/gptAttentionCommon.cpp
@@ -183,7 +183,7 @@ GPTAttentionPluginCommon::GPTAttentionPluginCommon(void const* data, size_t leng
     }
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
     TLLM_CHECK_WITH_INFO((smVersion() >= 80) || (mType != nvinfer1::DataType::kBF16),
diff --git a/cpp/tensorrt_llm/plugins/identityPlugin/identityPlugin.cpp b/cpp/tensorrt_llm/plugins/identityPlugin/identityPlugin.cpp
index 2174fd5332..109010e7a9 100644
--- a/cpp/tensorrt_llm/plugins/identityPlugin/identityPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/identityPlugin/identityPlugin.cpp
@@ -35,7 +35,7 @@ IdentityPlugin::IdentityPlugin(void const* data, size_t length)
     char const *d = reinterpret_cast<char const*>(data), *a = d;
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/layernormQuantizationPlugin/layernormQuantizationPlugin.cpp b/cpp/tensorrt_llm/plugins/layernormQuantizationPlugin/layernormQuantizationPlugin.cpp
index f397044ad6..02a40a00c9 100644
--- a/cpp/tensorrt_llm/plugins/layernormQuantizationPlugin/layernormQuantizationPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/layernormQuantizationPlugin/layernormQuantizationPlugin.cpp
@@ -61,7 +61,7 @@ LayernormQuantizationPlugin::LayernormQuantizationPlugin(void const* data, size_
     read(d, mOutputType);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/lookupPlugin/lookupPlugin.cpp b/cpp/tensorrt_llm/plugins/lookupPlugin/lookupPlugin.cpp
index 3016c3c945..e4d26f9e5e 100644
--- a/cpp/tensorrt_llm/plugins/lookupPlugin/lookupPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/lookupPlugin/lookupPlugin.cpp
@@ -48,7 +48,7 @@ LookupPlugin::LookupPlugin(void const* data, size_t length)
     read(d, mRank);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/loraPlugin/loraPlugin.cpp b/cpp/tensorrt_llm/plugins/loraPlugin/loraPlugin.cpp
index 763ca2f069..7a7d925a74 100644
--- a/cpp/tensorrt_llm/plugins/loraPlugin/loraPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/loraPlugin/loraPlugin.cpp
@@ -78,7 +78,7 @@ LoraPlugin::LoraPlugin(void const* data, size_t length, LoraPlugin::PluginProfil
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/lowLatencyGemmPlugin/lowLatencyGemmPlugin.cpp b/cpp/tensorrt_llm/plugins/lowLatencyGemmPlugin/lowLatencyGemmPlugin.cpp
index fd412bc0d4..6165d6210f 100644
--- a/cpp/tensorrt_llm/plugins/lowLatencyGemmPlugin/lowLatencyGemmPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/lowLatencyGemmPlugin/lowLatencyGemmPlugin.cpp
@@ -124,7 +124,7 @@ LowLatencyGemmPlugin::LowLatencyGemmPlugin(void const* data, size_t length, Plug
     mPluginProfiler->deserialize(d, mDims, mGemmId);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/lowLatencyGemmSwigluPlugin/lowLatencyGemmSwigluPlugin.cpp b/cpp/tensorrt_llm/plugins/lowLatencyGemmSwigluPlugin/lowLatencyGemmSwigluPlugin.cpp
index d2e8d370ec..a1aa11c2f1 100644
--- a/cpp/tensorrt_llm/plugins/lowLatencyGemmSwigluPlugin/lowLatencyGemmSwigluPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/lowLatencyGemmSwigluPlugin/lowLatencyGemmSwigluPlugin.cpp
@@ -159,7 +159,7 @@ LowLatencyGemmSwigluPlugin::LowLatencyGemmSwigluPlugin(
     mPluginProfiler->deserialize(d, mDims, mGemmId);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/mixtureOfExperts/mixtureOfExpertsPlugin.cpp b/cpp/tensorrt_llm/plugins/mixtureOfExperts/mixtureOfExpertsPlugin.cpp
index 6db0e4a382..48b7c23b82 100644
--- a/cpp/tensorrt_llm/plugins/mixtureOfExperts/mixtureOfExpertsPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/mixtureOfExperts/mixtureOfExpertsPlugin.cpp
@@ -175,7 +175,7 @@ MixtureOfExpertsPlugin::MixtureOfExpertsPlugin(void const* data, size_t length,
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/ncclPlugin/allgatherPlugin.cpp b/cpp/tensorrt_llm/plugins/ncclPlugin/allgatherPlugin.cpp
index 5c8a35b808..4825dd51bb 100644
--- a/cpp/tensorrt_llm/plugins/ncclPlugin/allgatherPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/ncclPlugin/allgatherPlugin.cpp
@@ -48,7 +48,7 @@ AllgatherPlugin::AllgatherPlugin(void const* data, size_t length)
     }
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/ncclPlugin/allreducePlugin.cpp b/cpp/tensorrt_llm/plugins/ncclPlugin/allreducePlugin.cpp
index 89e05fb61a..4241cf8d85 100644
--- a/cpp/tensorrt_llm/plugins/ncclPlugin/allreducePlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/ncclPlugin/allreducePlugin.cpp
@@ -77,7 +77,7 @@ AllreducePlugin::AllreducePlugin(void const* data, size_t length)
     }
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
     check();
diff --git a/cpp/tensorrt_llm/plugins/ncclPlugin/recvPlugin.cpp b/cpp/tensorrt_llm/plugins/ncclPlugin/recvPlugin.cpp
index b6c3ba87c1..089ed31175 100644
--- a/cpp/tensorrt_llm/plugins/ncclPlugin/recvPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/ncclPlugin/recvPlugin.cpp
@@ -45,7 +45,7 @@ RecvPlugin::RecvPlugin(void const* data, size_t length)
     read(d, mSrcRank);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/ncclPlugin/reduceScatterPlugin.cpp b/cpp/tensorrt_llm/plugins/ncclPlugin/reduceScatterPlugin.cpp
index 09263d1498..fe17c44fc4 100644
--- a/cpp/tensorrt_llm/plugins/ncclPlugin/reduceScatterPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/ncclPlugin/reduceScatterPlugin.cpp
@@ -48,7 +48,7 @@ ReduceScatterPlugin::ReduceScatterPlugin(void const* data, size_t length)
     }
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/ncclPlugin/sendPlugin.cpp b/cpp/tensorrt_llm/plugins/ncclPlugin/sendPlugin.cpp
index b1ca7165a5..81d66aa821 100644
--- a/cpp/tensorrt_llm/plugins/ncclPlugin/sendPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/ncclPlugin/sendPlugin.cpp
@@ -46,7 +46,7 @@ SendPlugin::SendPlugin(void const* data, size_t length)
     read(d, mTgtRank);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/qserveGemmPlugin/qserveGemmPlugin.cpp b/cpp/tensorrt_llm/plugins/qserveGemmPlugin/qserveGemmPlugin.cpp
index 1ba984cbed..166f1cc32c 100644
--- a/cpp/tensorrt_llm/plugins/qserveGemmPlugin/qserveGemmPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/qserveGemmPlugin/qserveGemmPlugin.cpp
@@ -64,7 +64,7 @@ QServeGemmPlugin::QServeGemmPlugin(void const* data, size_t length)
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/quantizePerTokenPlugin/quantizePerTokenPlugin.cpp b/cpp/tensorrt_llm/plugins/quantizePerTokenPlugin/quantizePerTokenPlugin.cpp
index 678c6eaa1a..23d0b80390 100644
--- a/cpp/tensorrt_llm/plugins/quantizePerTokenPlugin/quantizePerTokenPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/quantizePerTokenPlugin/quantizePerTokenPlugin.cpp
@@ -51,7 +51,7 @@ QuantizePerTokenPlugin::QuantizePerTokenPlugin(void const* data, size_t length)
     read(d, mSumPerToken);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/quantizeTensorPlugin/quantizeTensorPlugin.cpp b/cpp/tensorrt_llm/plugins/quantizeTensorPlugin/quantizeTensorPlugin.cpp
index 7d4bbbb65d..cacb32b809 100644
--- a/cpp/tensorrt_llm/plugins/quantizeTensorPlugin/quantizeTensorPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/quantizeTensorPlugin/quantizeTensorPlugin.cpp
@@ -35,7 +35,7 @@ QuantizeTensorPlugin::QuantizeTensorPlugin(void const* data, size_t length)
     char const *d = reinterpret_cast<char const*>(data), *a = d;
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/quantizeToFP4Plugin/quantizeToFP4Plugin.cpp b/cpp/tensorrt_llm/plugins/quantizeToFP4Plugin/quantizeToFP4Plugin.cpp
index b75e7cb066..7f88ca2e85 100644
--- a/cpp/tensorrt_llm/plugins/quantizeToFP4Plugin/quantizeToFP4Plugin.cpp
+++ b/cpp/tensorrt_llm/plugins/quantizeToFP4Plugin/quantizeToFP4Plugin.cpp
@@ -41,7 +41,7 @@ QuantizeToFP4Plugin::QuantizeToFP4Plugin(void const* data, size_t length)
     char const *d = reinterpret_cast<char const*>(data), *a = d;
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/rmsnormQuantizationPlugin/rmsnormQuantizationPlugin.cpp b/cpp/tensorrt_llm/plugins/rmsnormQuantizationPlugin/rmsnormQuantizationPlugin.cpp
index d8e2fbe595..16d0bf2dc3 100644
--- a/cpp/tensorrt_llm/plugins/rmsnormQuantizationPlugin/rmsnormQuantizationPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/rmsnormQuantizationPlugin/rmsnormQuantizationPlugin.cpp
@@ -58,7 +58,7 @@ RmsnormQuantizationPlugin::RmsnormQuantizationPlugin(void const* data, size_t le
     read(d, mOutputType);
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/smoothQuantGemmPlugin/smoothQuantGemmPlugin.cpp b/cpp/tensorrt_llm/plugins/smoothQuantGemmPlugin/smoothQuantGemmPlugin.cpp
index 4235c808a4..9470a879a0 100644
--- a/cpp/tensorrt_llm/plugins/smoothQuantGemmPlugin/smoothQuantGemmPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/smoothQuantGemmPlugin/smoothQuantGemmPlugin.cpp
@@ -98,7 +98,7 @@ SmoothQuantGemmPlugin::SmoothQuantGemmPlugin(
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin/weightOnlyGroupwiseQuantMatmulPlugin.cpp b/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin/weightOnlyGroupwiseQuantMatmulPlugin.cpp
index c9b779f4f3..85f0cf0112 100644
--- a/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin/weightOnlyGroupwiseQuantMatmulPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin/weightOnlyGroupwiseQuantMatmulPlugin.cpp
@@ -148,7 +148,7 @@ WeightOnlyGroupwiseQuantMatmulPlugin::WeightOnlyGroupwiseQuantMatmulPlugin(
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/plugins/weightOnlyQuantMatmulPlugin/weightOnlyQuantMatmulPlugin.cpp b/cpp/tensorrt_llm/plugins/weightOnlyQuantMatmulPlugin/weightOnlyQuantMatmulPlugin.cpp
index 115f8c2a19..f3ed07fafa 100644
--- a/cpp/tensorrt_llm/plugins/weightOnlyQuantMatmulPlugin/weightOnlyQuantMatmulPlugin.cpp
+++ b/cpp/tensorrt_llm/plugins/weightOnlyQuantMatmulPlugin/weightOnlyQuantMatmulPlugin.cpp
@@ -126,7 +126,7 @@ WeightOnlyQuantMatmulPlugin::WeightOnlyQuantMatmulPlugin(
 
     TLLM_CHECK_WITH_INFO(d == a + length,
         "Expected length (%d) != real length (%d). This is often "
-        "caused by using different TensorRT-LLM version to build "
+        "caused by using different TensorRT LLM version to build "
         "engine and run engine.",
         (int) length, (int) (d - a));
 }
diff --git a/cpp/tensorrt_llm/pybind/bindings.cpp b/cpp/tensorrt_llm/pybind/bindings.cpp
index 216baaa362..07c4943e79 100644
--- a/cpp/tensorrt_llm/pybind/bindings.cpp
+++ b/cpp/tensorrt_llm/pybind/bindings.cpp
@@ -67,7 +67,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
 
 PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m)
 {
-    m.doc() = "TensorRT-LLM Python bindings for C++ runtime";
+    m.doc() = "TensorRT LLM Python bindings for C++ runtime";
     m.attr("binding_type") = "pybind";
 
     // Create MpiComm binding first since it's used in the executor bindings
diff --git a/cpp/tensorrt_llm/runtime/tllmRuntime.h b/cpp/tensorrt_llm/runtime/tllmRuntime.h
index d254907267..dfef06d8b4 100644
--- a/cpp/tensorrt_llm/runtime/tllmRuntime.h
+++ b/cpp/tensorrt_llm/runtime/tllmRuntime.h
@@ -56,7 +56,7 @@ public:
     }
 
     /// @brief If multiple TensorRT optimization profiles are built in the engine, this function selects the
-    /// corresponding profile that is going to be used based on the runtime shape, for now, TensorRT-LLM only split
+    /// corresponding profile that is going to be used based on the runtime shape, for now, TensorRT LLM only split
     /// multiple profiles on the num_tokens dimension, hence the profile index is selected based on which profile
     /// handles the actual num_tokens
     /// @return The index of the selected TensorRT optimization profile
diff --git a/cpp/tests/batch_manager/cacheTransceiverTest.cpp b/cpp/tests/batch_manager/cacheTransceiverTest.cpp
index af916359d0..f6a7f2a139 100644
--- a/cpp/tests/batch_manager/cacheTransceiverTest.cpp
+++ b/cpp/tests/batch_manager/cacheTransceiverTest.cpp
@@ -330,7 +330,7 @@ protected:
             {
                 void* ret = dllGetSym(handle, name);
                 TLLM_CHECK_WITH_INFO(ret != nullptr,
-                    "Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "
+                    "Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "
                     "built with UCX support, please rebuild in UCX-enabled environment.");
                 return ret;
             };
@@ -732,7 +732,7 @@ protected:
                 {
                     void* ret = dllGetSym(handle, name);
                     TLLM_CHECK_WITH_INFO(ret != nullptr,
-                        "Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "
+                        "Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "
                         "built with UCX support, please rebuild in UCX-enabled environment.");
                     return ret;
                 };
diff --git a/cpp/tests/unit_tests/executor/ucxCommTest.cpp b/cpp/tests/unit_tests/executor/ucxCommTest.cpp
index 5895ac0947..08b5c0f7fa 100644
--- a/cpp/tests/unit_tests/executor/ucxCommTest.cpp
+++ b/cpp/tests/unit_tests/executor/ucxCommTest.cpp
@@ -70,7 +70,7 @@ std::unique_ptr<texec::kv_cache::ConnectionManager> makeOneUcxConnectionManager(
         void* ret = dllGetSym(handle, name);
 
         TLLM_CHECK_WITH_INFO(ret != nullptr,
-            "Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "
+            "Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "
             "built with UCX support, please rebuild in UCX-enabled environment.");
         return ret;
     };
diff --git a/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluKernelTestSm90Fp8.cu b/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluKernelTestSm90Fp8.cu
index 5aa2f0a518..bdbec84ec8 100644
--- a/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluKernelTestSm90Fp8.cu
+++ b/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluKernelTestSm90Fp8.cu
@@ -243,7 +243,7 @@ Result run(std::string description, Options& options, Buffers& buffers)
     auto can_implement = device_gemm.can_implement(arguments);
     if (can_implement != cutlass::Status::kSuccess)
     {
-        throw std::runtime_error("[TensorRT-LLM Error][fusedGatedGemm Runner]");
+        throw std::runtime_error("[TensorRT LLM Error][fusedGatedGemm Runner]");
     }
 
     // Initialize CUTLASS kernel with arguments and workspace pointer
@@ -481,7 +481,7 @@ int main(int argc, char const** argv)
 #ifdef COMPILE_HOPPER_TMA_GEMMS
     Result hopperFp8 = run<Gemm>(std::string("Hopper fp8 swiglu"), options, buffers);
 #else  // COMPILE_HOPPER_TMA_GEMMS
-    std::cout << "[TensorRT-LLm Error][GemmSwigluKernelTestSm90Fp8] Please recompile with support for hopper by "
+    std::cout << "[TensorRT LLM Error][GemmSwigluKernelTestSm90Fp8] Please recompile with support for hopper by "
                  "passing 90-real as an arch to build_wheel.py."
               << std::endl;
 #endif // COMPILE_HOPPER_TMA_GEMMS
diff --git a/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluRunnerTest.cu b/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluRunnerTest.cu
index 3db0d1a4cc..872a7deeee 100644
--- a/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluRunnerTest.cu
+++ b/cpp/tests/unit_tests/kernels/fused_gated_gemm/gemmSwigluRunnerTest.cu
@@ -338,7 +338,7 @@ TEST(GemmSwigluRunner, Sm90FP8)
     Result hopperFp8 = run("SM90 FP8 WS GEMM", options, buffers);
     EXPECT_TRUE(hopperFp8.passed);
 #else  // COMPILE_HOPPER_TMA_GEMMS
-    std::cout << "[TensorRT-LLm Error][GemmSwigluRunnerTest] Please recompile with support for hopper by passing "
+    std::cout << "[TensorRT LLM Error][GemmSwigluRunnerTest] Please recompile with support for hopper by passing "
                  "90-real as an arch to build_wheel.py."
               << std::endl;
 #endif // COMPILE_HOPPER_TMA_GEMMS
diff --git a/examples/apps/fastapi_server.py b/examples/apps/fastapi_server.py
index b2aa0baf2a..510b281a70 100755
--- a/examples/apps/fastapi_server.py
+++ b/examples/apps/fastapi_server.py
@@ -1,6 +1,6 @@
 """
 NOTE: This FastAPI-based server is only an example for demonstrating the usage
-of TensorRT-LLM LLM API. It is not intended for production use.
+of TensorRT LLM LLM API. It is not intended for production use.
 For production, use the `trtllm-serve` command. The server exposes OpenAI compatible API endpoints.
 """
 
diff --git a/examples/cpp_library/main.cpp b/examples/cpp_library/main.cpp
index 20372f2270..7613a75a14 100644
--- a/examples/cpp_library/main.cpp
+++ b/examples/cpp_library/main.cpp
@@ -28,11 +28,11 @@ int main(int argc, char* argv[])
         void log(nvinfer1::ILogger::Severity severity, char const* msg) noexcept override
         {
             if (severity <= nvinfer1::ILogger::Severity::kERROR)
-                std::cerr << "[TensorRT-LLM ERR]: " << msg << std::endl;
+                std::cerr << "[TensorRT LLM ERR]: " << msg << std::endl;
             else if (severity == nvinfer1::ILogger::Severity::kWARNING)
-                std::cerr << "[TensorRT-LLM WARNING]: " << msg << std::endl;
+                std::cerr << "[TensorRT LLM WARNING]: " << msg << std::endl;
             else
-                std::cout << "[TensorRT-LLM LOG]: " << msg << std::endl;
+                std::cout << "[TensorRT LLM LOG]: " << msg << std::endl;
         }
     };
 
diff --git a/examples/eagle/convert_checkpoint.py b/examples/eagle/convert_checkpoint.py
index 1632e1e218..4b6e2d0e12 100644
--- a/examples/eagle/convert_checkpoint.py
+++ b/examples/eagle/convert_checkpoint.py
@@ -144,7 +144,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/generate_checkpoint_config.py b/examples/generate_checkpoint_config.py
index 91252e4cfa..a11104eeba 100644
--- a/examples/generate_checkpoint_config.py
+++ b/examples/generate_checkpoint_config.py
@@ -12,7 +12,7 @@ def parse_arguments():
         '--output_path',
         type=str,
         default='config.json',
-        help='The path to save the TensorRT-LLM checkpoint config.json file')
+        help='The path to save the TensorRT LLM checkpoint config.json file')
     parser.add_argument('--architecture', type=str, default='GPTForCausalLM')
     parser.add_argument('--dtype',
                         type=str,
diff --git a/examples/llm-api/llm_mgmn_llm_distributed.sh b/examples/llm-api/llm_mgmn_llm_distributed.sh
index cd89de7b5d..bc6b6e16a6 100644
--- a/examples/llm-api/llm_mgmn_llm_distributed.sh
+++ b/examples/llm-api/llm_mgmn_llm_distributed.sh
@@ -29,7 +29,7 @@
 #   MOUNT_DIR: the directory to mount in the container
 #   MOUNT_DEST: the destination directory in the container
 #   WORKDIR: the working directory in the container
-#   SOURCE_ROOT: the path to the TensorRT-LLM source
+#   SOURCE_ROOT: the path to the TensorRT LLM source
 #   PROLOGUE: the prologue to run before the script
 #   LOCAL_MODEL: the local model directory to use, NOTE: downloading from HF is
 #      not supported in Slurm mode, you need to download the model and put it in
diff --git a/examples/llm-api/llm_mgmn_trtllm_bench.sh b/examples/llm-api/llm_mgmn_trtllm_bench.sh
index 5169c00ad3..43c126368d 100644
--- a/examples/llm-api/llm_mgmn_trtllm_bench.sh
+++ b/examples/llm-api/llm_mgmn_trtllm_bench.sh
@@ -29,7 +29,7 @@
 #   MOUNT_DIR: the directory to mount in the container
 #   MOUNT_DEST: the destination directory in the container
 #   WORKDIR: the working directory in the container
-#   SOURCE_ROOT: the path to the TensorRT-LLM source
+#   SOURCE_ROOT: the path to the TensorRT LLM source
 #   PROLOGUE: the prologue to run before the script
 #   LOCAL_MODEL: the local model directory to use, NOTE: downloading from HF is
 #      not supported in Slurm mode, you need to download the model and put it in
diff --git a/examples/llm-api/llm_mgmn_trtllm_serve.sh b/examples/llm-api/llm_mgmn_trtllm_serve.sh
index 05d9df88ce..a0cd8ce11f 100644
--- a/examples/llm-api/llm_mgmn_trtllm_serve.sh
+++ b/examples/llm-api/llm_mgmn_trtllm_serve.sh
@@ -29,7 +29,7 @@
 #   MOUNT_DIR: the directory to mount in the container
 #   MOUNT_DEST: the destination directory in the container
 #   WORKDIR: the working directory in the container
-#   SOURCE_ROOT: the path to the TensorRT-LLM source
+#   SOURCE_ROOT: the path to the TensorRT LLM source
 #   PROLOGUE: the prologue to run before the script
 #   LOCAL_MODEL: the local model directory to use, NOTE: downloading from HF is
 #      not supported in Slurm mode, you need to download the model and put it in
diff --git a/examples/llm-api/llm_sampling.py b/examples/llm-api/llm_sampling.py
index 679ef17fc9..dcaeb552b7 100644
--- a/examples/llm-api/llm_sampling.py
+++ b/examples/llm-api/llm_sampling.py
@@ -161,7 +161,7 @@ def demonstrate_with_logprobs(prompt: str):
 
 def run_all_demonstrations(model_path: Optional[str] = None):
     """Run all sampling demonstrations."""
-    print("🚀 TensorRT-LLM Sampling Techniques Showcase")
+    print("🚀 TensorRT LLM Sampling Techniques Showcase")
     print("=" * 50)
 
     # Use the first prompt for most demonstrations
diff --git a/examples/medusa/convert_checkpoint.py b/examples/medusa/convert_checkpoint.py
index fac23ddfd7..0a15e842bf 100644
--- a/examples/medusa/convert_checkpoint.py
+++ b/examples/medusa/convert_checkpoint.py
@@ -161,7 +161,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/baichuan/convert_checkpoint.py b/examples/models/contrib/baichuan/convert_checkpoint.py
index 840394a93d..2a35bfbe48 100644
--- a/examples/models/contrib/baichuan/convert_checkpoint.py
+++ b/examples/models/contrib/baichuan/convert_checkpoint.py
@@ -53,7 +53,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/bloom/convert_checkpoint.py b/examples/models/contrib/bloom/convert_checkpoint.py
index 7a562e0b4b..24549fa40b 100644
--- a/examples/models/contrib/bloom/convert_checkpoint.py
+++ b/examples/models/contrib/bloom/convert_checkpoint.py
@@ -156,7 +156,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=Path,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--calib_dataset',
         type=str,
diff --git a/examples/models/contrib/cogvlm/convert_checkpoint.py b/examples/models/contrib/cogvlm/convert_checkpoint.py
index 1573e54814..da99d8f006 100644
--- a/examples/models/contrib/cogvlm/convert_checkpoint.py
+++ b/examples/models/contrib/cogvlm/convert_checkpoint.py
@@ -190,7 +190,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/dbrx/convert_checkpoint.py b/examples/models/contrib/dbrx/convert_checkpoint.py
index cc463c76e4..f9f8ac9cea 100644
--- a/examples/models/contrib/dbrx/convert_checkpoint.py
+++ b/examples/models/contrib/dbrx/convert_checkpoint.py
@@ -90,7 +90,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/deepseek_v1/convert_checkpoint.py b/examples/models/contrib/deepseek_v1/convert_checkpoint.py
index 024eae4e22..50751f4260 100644
--- a/examples/models/contrib/deepseek_v1/convert_checkpoint.py
+++ b/examples/models/contrib/deepseek_v1/convert_checkpoint.py
@@ -79,7 +79,7 @@ def parse_arguments():
                         type=str,
                         default='trtllm_checkpoint',
                         required=True,
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/deepseek_v2/convert_checkpoint.py b/examples/models/contrib/deepseek_v2/convert_checkpoint.py
index 1baa4b7d06..39ef281f04 100755
--- a/examples/models/contrib/deepseek_v2/convert_checkpoint.py
+++ b/examples/models/contrib/deepseek_v2/convert_checkpoint.py
@@ -79,7 +79,7 @@ def parse_arguments():
                         type=str,
                         default='trtllm_checkpoint',
                         required=True,
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/dit/convert_checkpoint.py b/examples/models/contrib/dit/convert_checkpoint.py
index 1e00295867..16b49ca312 100644
--- a/examples/models/contrib/dit/convert_checkpoint.py
+++ b/examples/models/contrib/dit/convert_checkpoint.py
@@ -87,7 +87,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument('--input_size',
                         type=int,
                         default=64,
diff --git a/examples/models/contrib/falcon/convert_checkpoint.py b/examples/models/contrib/falcon/convert_checkpoint.py
index 5012a55687..03584f9d7f 100644
--- a/examples/models/contrib/falcon/convert_checkpoint.py
+++ b/examples/models/contrib/falcon/convert_checkpoint.py
@@ -74,7 +74,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/gptj/convert_checkpoint.py b/examples/models/contrib/gptj/convert_checkpoint.py
index 1f701c5bf5..58749ac5d8 100644
--- a/examples/models/contrib/gptj/convert_checkpoint.py
+++ b/examples/models/contrib/gptj/convert_checkpoint.py
@@ -61,7 +61,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/gptneox/convert_checkpoint.py b/examples/models/contrib/gptneox/convert_checkpoint.py
index 506874a6c2..c78cc4824d 100644
--- a/examples/models/contrib/gptneox/convert_checkpoint.py
+++ b/examples/models/contrib/gptneox/convert_checkpoint.py
@@ -76,7 +76,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/grok/convert_checkpoint.py b/examples/models/contrib/grok/convert_checkpoint.py
index d28d13d3eb..72adbbd5a0 100644
--- a/examples/models/contrib/grok/convert_checkpoint.py
+++ b/examples/models/contrib/grok/convert_checkpoint.py
@@ -110,7 +110,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/mmdit/convert_checkpoint.py b/examples/models/contrib/mmdit/convert_checkpoint.py
index 824c994248..e2637a4014 100644
--- a/examples/models/contrib/mmdit/convert_checkpoint.py
+++ b/examples/models/contrib/mmdit/convert_checkpoint.py
@@ -37,7 +37,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/mpt/convert_checkpoint.py b/examples/models/contrib/mpt/convert_checkpoint.py
index e096be3cdf..be73f9f760 100644
--- a/examples/models/contrib/mpt/convert_checkpoint.py
+++ b/examples/models/contrib/mpt/convert_checkpoint.py
@@ -124,7 +124,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/opt/convert_checkpoint.py b/examples/models/contrib/opt/convert_checkpoint.py
index b5f2654dcd..1c7dcd08e8 100644
--- a/examples/models/contrib/opt/convert_checkpoint.py
+++ b/examples/models/contrib/opt/convert_checkpoint.py
@@ -76,7 +76,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/contrib/stdit/convert_checkpoint.py b/examples/models/contrib/stdit/convert_checkpoint.py
index 0640d2fa4b..f62d2366cc 100644
--- a/examples/models/contrib/stdit/convert_checkpoint.py
+++ b/examples/models/contrib/stdit/convert_checkpoint.py
@@ -44,7 +44,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument('--caption_channels',
                         type=int,
                         default=4096,
diff --git a/examples/models/core/bert/convert_checkpoint.py b/examples/models/core/bert/convert_checkpoint.py
index ed98c27686..fded1b8287 100644
--- a/examples/models/core/bert/convert_checkpoint.py
+++ b/examples/models/core/bert/convert_checkpoint.py
@@ -47,7 +47,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/commandr/convert_checkpoint.py b/examples/models/core/commandr/convert_checkpoint.py
index 6a4d08904f..b8cd903bd0 100644
--- a/examples/models/core/commandr/convert_checkpoint.py
+++ b/examples/models/core/commandr/convert_checkpoint.py
@@ -79,7 +79,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/gemma/convert_checkpoint.py b/examples/models/core/gemma/convert_checkpoint.py
index a79105c166..35ec3959ee 100644
--- a/examples/models/core/gemma/convert_checkpoint.py
+++ b/examples/models/core/gemma/convert_checkpoint.py
@@ -260,7 +260,7 @@ def main() -> None:
             trt_llm_config.query_pre_attn_scalar = ckpt_config.query_pre_attn_scalar
 
     trt_llm_config_dict = trt_llm_config.to_dict()
-    print(f"Determined TensorRT-LLM configuration {trt_llm_config_dict}")
+    print(f"Determined TensorRT LLM configuration {trt_llm_config_dict}")
 
     save_config(trt_llm_config, output_dir=args.output_model_dir, log=True)
 
diff --git a/examples/models/core/glm-4-9b/convert_checkpoint.py b/examples/models/core/glm-4-9b/convert_checkpoint.py
index 648567952f..e7c8dd2905 100644
--- a/examples/models/core/glm-4-9b/convert_checkpoint.py
+++ b/examples/models/core/glm-4-9b/convert_checkpoint.py
@@ -127,7 +127,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/gpt/convert_checkpoint.py b/examples/models/core/gpt/convert_checkpoint.py
index 84fc17206c..fbc060d7db 100644
--- a/examples/models/core/gpt/convert_checkpoint.py
+++ b/examples/models/core/gpt/convert_checkpoint.py
@@ -132,7 +132,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/internlm2/convert_checkpoint.py b/examples/models/core/internlm2/convert_checkpoint.py
index eb078400b7..2665ee6f3e 100644
--- a/examples/models/core/internlm2/convert_checkpoint.py
+++ b/examples/models/core/internlm2/convert_checkpoint.py
@@ -71,7 +71,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/llama/convert_checkpoint.py b/examples/models/core/llama/convert_checkpoint.py
index e4858c815b..19ea7a769f 100644
--- a/examples/models/core/llama/convert_checkpoint.py
+++ b/examples/models/core/llama/convert_checkpoint.py
@@ -227,7 +227,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/llama/summarize_long.py b/examples/models/core/llama/summarize_long.py
index cee2e07fdd..215ec93d56 100644
--- a/examples/models/core/llama/summarize_long.py
+++ b/examples/models/core/llama/summarize_long.py
@@ -51,7 +51,7 @@ def parse_args():
         '--max_input_len',
         type=int,
         default=6400,
-        help='The max input length TensorRT-LLM engine was built with')
+        help='The max input length TensorRT LLM engine was built with')
     parser.add_argument('--log_level', type=str, default='info')
     parser.add_argument('--max_ite', type=int, default=5)
     parser.add_argument(
@@ -392,7 +392,7 @@ def main(args):
                         references=[hf_summary[ite][beam_idx][batch_idx]])
 
         for beam_idx in range(args.num_beams):
-            logger.info(f"TensorRT-LLM beam {beam_idx} result")
+            logger.info(f"TensorRT LLM beam {beam_idx} result")
             computed_metrics_tensorrt_llm = metric_tensorrt_llm[
                 beam_idx].compute()
             for key in computed_metrics_tensorrt_llm.keys():
diff --git a/examples/models/core/mamba/convert_checkpoint.py b/examples/models/core/mamba/convert_checkpoint.py
index 04c743324f..0afaf10b90 100644
--- a/examples/models/core/mamba/convert_checkpoint.py
+++ b/examples/models/core/mamba/convert_checkpoint.py
@@ -59,7 +59,7 @@ def parse_arguments():
         '--output_dir',
         type=Path,
         default='mamba_tllm_checkpoint',
-        help='The path to save the mamba TensorRT-LLM checkpoint')
+        help='The path to save the mamba TensorRT LLM checkpoint')
     parser.add_argument('--log_level', type=str, default='info')
     parser.add_argument(
         '--workers',
diff --git a/examples/models/core/mllama/convert_checkpoint.py b/examples/models/core/mllama/convert_checkpoint.py
index fe8520d5ac..be0a054ad5 100644
--- a/examples/models/core/mllama/convert_checkpoint.py
+++ b/examples/models/core/mllama/convert_checkpoint.py
@@ -192,7 +192,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/multimodal/eval.py b/examples/models/core/multimodal/eval.py
index 01f49ce20e..4a77ac9aa5 100644
--- a/examples/models/core/multimodal/eval.py
+++ b/examples/models/core/multimodal/eval.py
@@ -132,11 +132,11 @@ def load_hf_model(args):
 
 
 def load_trtllm_model(args):
-    profiler.start('load TensorRT-LLM model')
+    profiler.start('load TensorRT LLM model')
     trtllm_model = MultimodalModelRunner(args)
-    profiler.stop('load TensorRT-LLM model')
+    profiler.stop('load TensorRT LLM model')
     logger.info(
-        f'Load TensorRT-LLM model takes: {profiler.elapsed_time_in_sec("load TensorRT-LLM model")} sec'
+        f'Load TensorRT LLM model takes: {profiler.elapsed_time_in_sec("load TensorRT LLM model")} sec'
     )
     return trtllm_model
 
diff --git a/examples/models/core/nemotron_nas/convert_checkpoint.py b/examples/models/core/nemotron_nas/convert_checkpoint.py
index eeedd8855d..c59b5b803d 100644
--- a/examples/models/core/nemotron_nas/convert_checkpoint.py
+++ b/examples/models/core/nemotron_nas/convert_checkpoint.py
@@ -56,7 +56,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/phi/convert_checkpoint.py b/examples/models/core/phi/convert_checkpoint.py
index d8bc6df8fd..fa59115343 100644
--- a/examples/models/core/phi/convert_checkpoint.py
+++ b/examples/models/core/phi/convert_checkpoint.py
@@ -81,7 +81,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/qwen/convert_checkpoint.py b/examples/models/core/qwen/convert_checkpoint.py
index 225b4989dc..0711e65ff0 100644
--- a/examples/models/core/qwen/convert_checkpoint.py
+++ b/examples/models/core/qwen/convert_checkpoint.py
@@ -137,7 +137,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/qwen2audio/run.py b/examples/models/core/qwen2audio/run.py
index 93e161c7e0..0c72eded66 100644
--- a/examples/models/core/qwen2audio/run.py
+++ b/examples/models/core/qwen2audio/run.py
@@ -316,7 +316,7 @@ class QWenInfer(object):
                                         stream.cuda_stream)
         stream.synchronize()
         audio_time = profiler.stop("Audio") / run_time
-        logger.info(f"TensorRT-LLM Audio latency: {audio_time:3f} sec ")
+        logger.info(f"TensorRT LLM Audio latency: {audio_time:3f} sec ")
 
         assert ok, "Runtime execution failed for audio session"
 
@@ -567,7 +567,7 @@ class QWenInfer(object):
                             print(f'Output(beam: {beam}): "{output_text}"')
         logger.info(f"Input length={input_lengths[b]}")
         logger.info(f"Output length={output_ids.shape}")
-        logger.info(f"TensorRT-LLM QWen time: {Qwen_time:3f} sec ")
+        logger.info(f"TensorRT LLM QWen time: {Qwen_time:3f} sec ")
         if isinstance(history, list):
             history.append({'role': 'assistant', 'content': output_text})
         return output_text, past_audio_features
diff --git a/examples/models/core/qwenvl/run.py b/examples/models/core/qwenvl/run.py
index 06ce341a9a..7013217429 100644
--- a/examples/models/core/qwenvl/run.py
+++ b/examples/models/core/qwenvl/run.py
@@ -418,7 +418,7 @@ class QWenInfer(object):
                             print(f'Output(beam: {beam}): "{output_text}"')
         logger.info(f"Input length={input_lengths[b]}")
         logger.info(f"Output length={output_ids.shape}")
-        logger.info(f"TensorRT-LLM QWen time: {Qwen_time:3f} sec ")
+        logger.info(f"TensorRT LLM QWen time: {Qwen_time:3f} sec ")
         history.append((query, output_text))
         return output_text
 
@@ -516,7 +516,7 @@ def vit_process(image_path, vit_engine_path, stream):
         ok = session_vit.run(visual_inputs, visual_outputs, stream)
     profiler.stop("ViT")
     Vit_time = profiler.elapsed_time_in_sec("ViT") / run_time
-    logger.info(f"TensorRT-LLM ViT latency: {Vit_time:3f} sec ")
+    logger.info(f"TensorRT LLM ViT latency: {Vit_time:3f} sec ")
 
     assert ok, "Runtime execution failed for vit session"
 
diff --git a/examples/models/core/recurrentgemma/convert_checkpoint.py b/examples/models/core/recurrentgemma/convert_checkpoint.py
index f7d9bf58c2..2f81bf1d2d 100644
--- a/examples/models/core/recurrentgemma/convert_checkpoint.py
+++ b/examples/models/core/recurrentgemma/convert_checkpoint.py
@@ -41,7 +41,7 @@ def parse_arguments():
         "--output_dir",
         type=Path,
         default="recurrentgemma_tllm_checkpoint",
-        help="The path to save the recurrentgemma TensorRT-LLM checkpoint")
+        help="The path to save the recurrentgemma TensorRT LLM checkpoint")
     parser.add_argument("--log_level", type=str, default="info")
     args = parser.parse_args()
     return args
@@ -506,11 +506,11 @@ def main():
     )
 
     trt_llm_config_dict = trt_llm_config.to_dict()
-    print(f"Determined TensorRT-LLM configuration {trt_llm_config_dict}")
+    print(f"Determined TensorRT LLM configuration {trt_llm_config_dict}")
 
     config_path = args.output_dir / "config.json"
     config_path.parent.mkdir(exist_ok=True, parents=True)
-    LOGGER.debug(f"Saving TensorRT-LLM configuration to {config_path}")
+    LOGGER.debug(f"Saving TensorRT LLM configuration to {config_path}")
     with config_path.open("w") as config_file:
         json.dump(trt_llm_config_dict, config_file, indent=4)
 
diff --git a/examples/models/core/vit/convert_checkpoint.py b/examples/models/core/vit/convert_checkpoint.py
index 5b1759e357..46f8e2b5e1 100644
--- a/examples/models/core/vit/convert_checkpoint.py
+++ b/examples/models/core/vit/convert_checkpoint.py
@@ -42,7 +42,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--workers',
         type=int,
diff --git a/examples/models/core/whisper/convert_checkpoint.py b/examples/models/core/whisper/convert_checkpoint.py
index bd9bc1f44f..28dd4e9fac 100644
--- a/examples/models/core/whisper/convert_checkpoint.py
+++ b/examples/models/core/whisper/convert_checkpoint.py
@@ -62,7 +62,7 @@ def parse_arguments():
     parser.add_argument('--output_dir',
                         type=str,
                         default='tllm_checkpoint',
-                        help='The path to save the TensorRT-LLM checkpoint')
+                        help='The path to save the TensorRT LLM checkpoint')
     parser.add_argument(
         '--use_weight_only',
         default=False,
diff --git a/examples/openai_triton/manual_plugin/plugin.py b/examples/openai_triton/manual_plugin/plugin.py
index a7d559b4be..7009caaeb6 100644
--- a/examples/openai_triton/manual_plugin/plugin.py
+++ b/examples/openai_triton/manual_plugin/plugin.py
@@ -35,7 +35,7 @@ def _load_triton_plugin_lib():
     plugin_lib = triton_plugin_dir / 'build/libtrt_llm_custom_plugins.so'
     handle = ctypes.CDLL(plugin_lib, mode=ctypes.RTLD_GLOBAL)
     if handle is None:
-        raise ImportError('TensorRT-LLM Triton Plugin is unavailable')
+        raise ImportError('TensorRT LLM Triton Plugin is unavailable')
     handle.initOpenAiTritonPlugins.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
     handle.initOpenAiTritonPlugins.restype = ctypes.c_bool
     assert handle.initOpenAiTritonPlugins(
diff --git a/examples/redrafter/convert_checkpoint.py b/examples/redrafter/convert_checkpoint.py
index f5bffd0873..4c18ca612e 100644
--- a/examples/redrafter/convert_checkpoint.py
+++ b/examples/redrafter/convert_checkpoint.py
@@ -102,7 +102,7 @@ def parse_arguments():
         "--output_dir",
         type=str,
         default="tllm_checkpoint",
-        help="The path to save the TensorRT-LLM checkpoint",
+        help="The path to save the TensorRT LLM checkpoint",
     )
     parser.add_argument(
         "--workers",
diff --git a/examples/summarize.py b/examples/summarize.py
index 273c170001..ffa4377f85 100644
--- a/examples/summarize.py
+++ b/examples/summarize.py
@@ -403,7 +403,7 @@ def main(args):
                         ],
                                                 dim=0)
                         curr_ppl = ppl(curr_logits, curr_ids)
-                        logger.debug(f"TensorRT-LLM PPL: {curr_ppl:.3f} | "
+                        logger.debug(f"TensorRT LLM PPL: {curr_ppl:.3f} | "
                                      f"Generation length: {curr_gen_len}")
                         ppls[batch_idx].append(curr_ppl)
             return output_beams_list, output_ids_list, ppls, lengths_info
@@ -622,7 +622,7 @@ def main(args):
         if runtime_rank == 0 and args.eval_task != "eval_context_ppl":
             logger.info(
                 "---------------------------------------------------------")
-            logger.info("TensorRT-LLM Generated: ")
+            logger.info("TensorRT LLM Generated: ")
             logger.info(f" Input: {datapoint[dataset_input_key]}")
             logger.info(f"\n Reference: {datapoint[dataset_output_key]}")
             logger.info(f"\n Output: {output}")
@@ -683,7 +683,7 @@ def main(args):
 
                 logger.debug('-' * 100)
                 logger.debug(f"Input: {datapoint[dataset_input_key]}")
-                logger.debug(f'TensorRT-LLM Output: {output_tensorrt_llm}')
+                logger.debug(f'TensorRT LLM Output: {output_tensorrt_llm}')
                 logger.debug(f"Reference: {datapoint[dataset_output_key]}")
 
             data_point_idx += max_batch_size
@@ -807,17 +807,17 @@ def main(args):
         if test_trt_llm:
             np.random.seed(0)  # rouge score use sampling to compute the score
             logger.info(
-                f'TensorRT-LLM (total latency: {profiler.elapsed_time_in_sec("tensorrt_llm")} sec)'
+                f'TensorRT LLM (total latency: {profiler.elapsed_time_in_sec("tensorrt_llm")} sec)'
             )
 
             logger.info(
-                f'TensorRT-LLM (total output tokens: {total_output_token_count_trt_llm})'
+                f'TensorRT LLM (total output tokens: {total_output_token_count_trt_llm})'
             )
             logger.info(
-                f'TensorRT-LLM (tokens per second: {total_output_token_count_trt_llm / profiler.elapsed_time_in_sec("tensorrt_llm")})'
+                f'TensorRT LLM (tokens per second: {total_output_token_count_trt_llm / profiler.elapsed_time_in_sec("tensorrt_llm")})'
             )
             for beam_idx in range(num_sequences):
-                logger.info(f"TensorRT-LLM beam {beam_idx} result")
+                logger.info(f"TensorRT LLM beam {beam_idx} result")
                 if args.eval_task != "eval_context_ppl":
                     if args.estimate_accuracy_std_dev:
                         computed_metrics_tensorrt_llm = metric_tensorrt_llm[
@@ -923,7 +923,7 @@ if __name__ == '__main__':
         type=str,
         default=None,
         help="Directory where to save output sentences. 'trtllm.out' for "
-        "TensorRT-LLM outputs, and 'hf.out' for HF outputs.  If None, do not "
+        "TensorRT LLM outputs, and 'hf.out' for HF outputs.  If None, do not "
         "save outputs.")
     parser.add_argument(
         '--rouge_dir',
diff --git a/scripts/build_wheel.py b/scripts/build_wheel.py
index 52abdbcb84..105e648dfc 100755
--- a/scripts/build_wheel.py
+++ b/scripts/build_wheel.py
@@ -236,9 +236,9 @@ def setup_conan(scripts_dir, venv_python):
     # Create default profile
     build_run(f'"{venv_conan}" profile detect -f')
 
-    # Add the tensorrt-llm remote if it doesn't exist
+    # Add the TensorRT LLM remote if it doesn't exist
     build_run(
-        f'"{venv_conan}" remote add --force tensorrt-llm https://edge.urm.nvidia.com/artifactory/api/conan/sw-tensorrt-llm-conan',
+        f'"{venv_conan}" remote add --force TensorRT-LLM https://edge.urm.nvidia.com/artifactory/api/conan/sw-tensorrt-llm-conan',
         stdout=DEVNULL,
         stderr=DEVNULL)
 
@@ -481,7 +481,7 @@ def main(*,
     with working_directory(build_dir):
         if clean or first_build or configure_cmake:
             build_run(
-                f"\"{venv_conan}\" install --build=missing --remote=tensorrt-llm --output-folder={build_dir}/conan -s 'build_type={build_type}' {source_dir}"
+                f"\"{venv_conan}\" install --build=missing --remote=TensorRT-LLM --output-folder={build_dir}/conan -s 'build_type={build_type}' {source_dir}"
             )
             cmake_def_args.append(
                 f"-DCMAKE_TOOLCHAIN_FILE={build_dir}/conan/conan_toolchain.cmake"
diff --git a/tensorrt_llm/__init__.py b/tensorrt_llm/__init__.py
index f54026a8cb..a9a929853a 100644
--- a/tensorrt_llm/__init__.py
+++ b/tensorrt_llm/__init__.py
@@ -115,6 +115,6 @@ __all__ = [
 
 _init()
 
-print(f"[TensorRT-LLM] TensorRT-LLM version: {__version__}")
+print(f"[TensorRT-LLM] TensorRT LLM version: {__version__}")
 
 sys.stdout.flush()
diff --git a/tensorrt_llm/_common.py b/tensorrt_llm/_common.py
index 6283cd514d..c0d64abb81 100644
--- a/tensorrt_llm/_common.py
+++ b/tensorrt_llm/_common.py
@@ -54,10 +54,10 @@ def _init(log_level: object = None) -> None:
         logger.set_level(log_level)
 
     if os.getenv("TRT_LLM_NO_LIB_INIT", "0") == "1":
-        logger.info("Skipping TensorRT-LLM init.")
+        logger.info("Skipping TensorRT LLM init.")
         return
 
-    logger.info("Starting TensorRT-LLM init.")
+    logger.info("Starting TensorRT LLM init.")
 
     # load plugin lib
     _load_plugin_lib()
@@ -82,7 +82,7 @@ def _init(log_level: object = None) -> None:
 
     MpiComm.local_init()
 
-    logger.info("TensorRT-LLM inited.")
+    logger.info("TensorRT LLM inited.")
 
 
 def default_net() -> Network:
diff --git a/tensorrt_llm/_torch/autotuner.py b/tensorrt_llm/_torch/autotuner.py
index 6b5f8717f0..4a6c426aa9 100644
--- a/tensorrt_llm/_torch/autotuner.py
+++ b/tensorrt_llm/_torch/autotuner.py
@@ -260,7 +260,7 @@ class AutoTunerStatistics:
 
 
 class AutoTuner:
-    """AutoTuner for optimizing TensorRT-LLM operations.
+    """AutoTuner for optimizing TensorRT LLM operations.
 
     This class handles automatic performance tuning of tensor operations by profiling
     different implementations and caching the best performing configurations.
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
index bcd006be71..5cc93e38d9 100644
--- a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
+++ b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
@@ -109,8 +109,8 @@ class _ExecutorMemoryMonitor():
                f"{self._bytes_to_gib(sample.free_gpu_memory_bytes_pre):.2f} / {self._bytes_to_gib(sample.free_gpu_memory_bytes_post):.2f}"
                ) for sample in self._samples),
             "",
-            ("Please refer to the TensorRT-LLM documentation for information on how "
-             "to control the memory usage through TensorRT-LLM configuration options. "
+            ("Please refer to the TensorRT LLM documentation for information on how "
+             "to control the memory usage through TensorRT LLM configuration options. "
              "Possible options include:"),
             *(f"  {stage.value}: {tuning_knobs[stage]}"
               for stage in chain((sample.creation_stage
diff --git a/tensorrt_llm/_utils.py b/tensorrt_llm/_utils.py
index d6cce43776..0ed224a68d 100644
--- a/tensorrt_llm/_utils.py
+++ b/tensorrt_llm/_utils.py
@@ -1104,7 +1104,7 @@ def is_multi_device_enable():
     This method evaluates if we are running on multiple GPUs and the flag ENABLE_MULTI_DEVICE is set.
     So we can avoid broadcast calls on single GPU.
     Issue: https://github.com/NVIDIA/TensorRT-LLM/issues/5927
-    ENABLE_MULTI_DEVICE is true by default when building tensorrt-llm so we need to also check
+    ENABLE_MULTI_DEVICE is true by default when building TensorRT LLM so we need to also check
     the number of devices
     """
     return local_mpi_size() > 1
diff --git a/tensorrt_llm/bench/benchmark/low_latency.py b/tensorrt_llm/bench/benchmark/low_latency.py
index af86fb2b1e..455f74a64b 100644
--- a/tensorrt_llm/bench/benchmark/low_latency.py
+++ b/tensorrt_llm/bench/benchmark/low_latency.py
@@ -36,7 +36,7 @@ from tensorrt_llm.sampling_params import SamplingParams
 
 @click.command(name="latency")
 @optgroup.group("Engine run configuration",
-                help="Runtime settings for executing a TensorRT-LLM engine.")
+                help="Runtime settings for executing a TensorRT LLM engine.")
 @optgroup.option(
     "--engine_dir",
     type=click.Path(exists=True,
@@ -137,7 +137,7 @@ from tensorrt_llm.sampling_params import SamplingParams
     "Desired concurrency rate (number of requests processing at the same time), <=0 for no concurrency limit.",
 )
 @optgroup.group("Speculative Decode Options",
-                help="Runtime settings for executing a TensorRT-LLM engine.")
+                help="Runtime settings for executing a TensorRT LLM engine.")
 @optgroup.option(
     "--medusa_choices",
     type=click.Path(exists=True,
diff --git a/tensorrt_llm/bench/benchmark/throughput.py b/tensorrt_llm/bench/benchmark/throughput.py
index a353b2a883..654f821dd9 100755
--- a/tensorrt_llm/bench/benchmark/throughput.py
+++ b/tensorrt_llm/bench/benchmark/throughput.py
@@ -35,7 +35,7 @@ from tensorrt_llm.sampling_params import SamplingParams
 
 @click.command(name="throughput")
 @optgroup.group("Engine run configuration.",
-                help="Runtime settings for executing a TensorRT-LLM engine.")
+                help="Runtime settings for executing a TensorRT LLM engine.")
 @optgroup.option(
     "--engine_dir",
     type=click.Path(exists=True,
diff --git a/tensorrt_llm/bench/build/build.py b/tensorrt_llm/bench/build/build.py
index 904c7985ec..4de393a5ec 100644
--- a/tensorrt_llm/bench/build/build.py
+++ b/tensorrt_llm/bench/build/build.py
@@ -131,7 +131,7 @@ def apply_build_mode_settings(params):
 
 @click.command(name="build")
 @optgroup.group("Engine Configuration",
-                help="Configuration of the TensorRT-LLM engine.")
+                help="Configuration of the TensorRT LLM engine.")
 @optgroup.option(
     "--tp_size",
     "-tp",
diff --git a/tensorrt_llm/bench/dataclasses/reporting.py b/tensorrt_llm/bench/dataclasses/reporting.py
index a4154ee43c..4812ae02f8 100755
--- a/tensorrt_llm/bench/dataclasses/reporting.py
+++ b/tensorrt_llm/bench/dataclasses/reporting.py
@@ -493,7 +493,7 @@ class ReportUtility:
                 f"Model:\t\t\t{engine['model']}\n"
                 f"Model Path:\t\t{engine['model_path']}\n"
                 f"Engine Directory:\t{engine['engine_dir']}\n"
-                f"TensorRT-LLM Version:\t{engine['version']}\n"
+                f"TensorRT LLM Version:\t{engine['version']}\n"
                 f"Dtype:\t\t\t{pretrain_cfg['dtype']}\n"
                 f"KV Cache Dtype:\t\t{pretrain_cfg['quantization']['kv_cache_quant_algo']}\n"
                 f"Quantization:\t\t{pretrain_cfg['quantization']['quant_algo']}\n"
@@ -507,7 +507,7 @@ class ReportUtility:
                 "===========================================================\n"
                 f"Model:\t\t\t{engine['model']}\n"
                 f"Model Path:\t\t{engine['model_path']}\n"
-                f"TensorRT-LLM Version:\t{engine['version']}\n"
+                f"TensorRT LLM Version:\t{engine['version']}\n"
                 f"Dtype:\t\t\t{engine['dtype']}\n"
                 f"KV Cache Dtype:\t\t{engine['kv_cache_dtype']}\n"
                 f"Quantization:\t\t{engine['quantization']}\n"
diff --git a/tensorrt_llm/builder.py b/tensorrt_llm/builder.py
index 272a865d88..85cba5be90 100644
--- a/tensorrt_llm/builder.py
+++ b/tensorrt_llm/builder.py
@@ -303,7 +303,7 @@ class Builder():
                                    builder_config) -> bool:
         '''
             For each profile, validate that the named dimensions of different input tensors in this profile all have same range.
-            TRT will validate the same condition, validate it earlier to make sure the modeling in TensorRT-LLM are correct and
+            TRT will validate the same condition, validate it earlier to make sure the modeling in TensorRT LLM are correct and
             makes the error msg more user friendly.
         '''
         valid = True
@@ -479,9 +479,9 @@ class Builder():
 
 @dataclass
 class BuildConfig:
-    """Configuration class for TensorRT-LLM engine building parameters.
+    """Configuration class for TensorRT LLM engine building parameters.
 
-    This class contains all the configuration parameters needed to build a TensorRT-LLM engine,
+    This class contains all the configuration parameters needed to build a TensorRT LLM engine,
     including sequence length limits, batch sizes, optimization settings, and various features.
 
     Args:
@@ -509,7 +509,7 @@ class BuildConfig:
         auto_parallel_config (AutoParallelConfig): Configuration for automatic parallelization. Defaults to default AutoParallelConfig.
         weight_sparsity (bool): Whether to enable weight sparsity optimization. Defaults to False.
         weight_streaming (bool): Whether to enable weight streaming for large models. Defaults to False.
-        plugin_config (PluginConfig): Configuration for TensorRT-LLM plugins. Defaults to default PluginConfig.
+        plugin_config (PluginConfig): Configuration for TensorRT LLM plugins. Defaults to default PluginConfig.
         use_strip_plan (bool): Whether to use stripped plan for engine building. Defaults to False.
         max_encoder_input_len (int): Maximum encoder input length for encoder-decoder models. Defaults to 1024.
         dry_run (bool): Whether to perform a dry run without actually building the engine. Defaults to False.
diff --git a/tensorrt_llm/commands/build.py b/tensorrt_llm/commands/build.py
index 9374883a9c..75630e8eb9 100644
--- a/tensorrt_llm/commands/build.py
+++ b/tensorrt_llm/commands/build.py
@@ -62,26 +62,26 @@ def parse_arguments():
         '--checkpoint_dir',
         type=str,
         default=None,
-        help="The directory path that contains TensorRT-LLM checkpoint.")
+        help="The directory path that contains TensorRT LLM checkpoint.")
     parser.add_argument(
         '--model_config',
         type=str,
         default=None,
-        help="The file path that saves TensorRT-LLM checkpoint config.")
+        help="The file path that saves TensorRT LLM checkpoint config.")
     parser.add_argument(
         '--build_config',
         type=str,
         default=None,
-        help="The file path that saves TensorRT-LLM build config.")
+        help="The file path that saves TensorRT LLM build config.")
     parser.add_argument(
         '--model_cls_file',
         type=str,
         default=None,
-        help="The file path that defines customized TensorRT-LLM model.")
+        help="The file path that defines customized TensorRT LLM model.")
     parser.add_argument('--model_cls_name',
                         type=str,
                         default=None,
-                        help="The customized TensorRT-LLM model class name.")
+                        help="The customized TensorRT LLM model class name.")
     parser.add_argument(
         '--output_dir',
         type=str,
diff --git a/tensorrt_llm/functional.py b/tensorrt_llm/functional.py
index 06880bc430..6bc0c691d7 100755
--- a/tensorrt_llm/functional.py
+++ b/tensorrt_llm/functional.py
@@ -590,7 +590,7 @@ class Tensor(object):
             return id(None)
 
     def __repr__(self):
-        return f"TensorRT-LLM Tensor: {self.name=} {self.dtype=} {self.shape=}"
+        return f"TensorRT LLM Tensor: {self.name=} {self.dtype=} {self.shape=}"
 
     def __xor__(self, b):
         '''
@@ -604,7 +604,7 @@ class Tensor(object):
 
 def _create_tensor(trt_tensor: trt.ITensor, producer: trt.ILayer) -> Tensor:
     '''
-    A helper function to create a TensorRT-LLM Tensor object that encapsulates
+    A helper function to create a TensorRT LLM Tensor object that encapsulates
     the connection between the TensorRT tensor (trt.ITensor) and the layer
     (trt.ILayer) that produces it.
 
@@ -626,7 +626,7 @@ def _create_tensor(trt_tensor: trt.ITensor, producer: trt.ILayer) -> Tensor:
             The producer.
 
     Returns:
-        The TensorRT-LLM tensor (functional.Tensor) that encapsulates the
+        The TensorRT LLM tensor (functional.Tensor) that encapsulates the
         TensorRT tensor and the layer that produces it. The former is
         accessible through the attribute 'trt_tensor' and the latter using the
         attribute 'producer'.
@@ -2051,8 +2051,8 @@ def expand_dims_like(left: Union[Tensor, int, float], right: Tensor) -> Tensor:
     return left
 
 
-# If dim is None, return a 1-D TensorRT-LLM tensor of the size
-# If dim is not None, return a 0-D TensorRT-LLM tensor of the dimension size
+# If dim is None, return a 1-D TensorRT LLM tensor of the size
+# If dim is not None, return a 0-D TensorRT LLM tensor of the dimension size
 def shape(input: Tensor,
           dim: Optional[int] = None,
           cast_to_dtype: Optional[Union[str, trt.DataType]] = None,
@@ -3471,7 +3471,7 @@ def softplus(input: Tensor, beta: float, threshold: float) -> Tensor:
 
     Parameters:
         input : Tensor
-            Input TensorRT-LLM Tensor.
+            Input TensorRT LLM Tensor.
         beta : float
             The parameter for softplus computation.
         threshold : float
diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py
index 4f2fbfb015..e9ac3c1e14 100644
--- a/tensorrt_llm/llmapi/llm.py
+++ b/tensorrt_llm/llmapi/llm.py
@@ -732,7 +732,7 @@ class BaseLLM:
 
 @append_docstring(TRT_LLM_DOCSTRING)
 class _TrtLLM(BaseLLM):
-    """LLM class is the main class for running a LLM model using TensorRT-LLM backend.
+    """LLM class is the main class for running a LLM model using TensorRT LLM backend.
 
     Parameters:
 """
diff --git a/tensorrt_llm/llmapi/llm_utils.py b/tensorrt_llm/llmapi/llm_utils.py
index ea20f023f4..3989ba78ea 100644
--- a/tensorrt_llm/llmapi/llm_utils.py
+++ b/tensorrt_llm/llmapi/llm_utils.py
@@ -152,7 +152,7 @@ class ModelLoader:
         if isinstance(self.llm_args.model, Module):
             # Build engine from user provided model
             self._build_pipeline.append(
-                ("Build TensorRT-LLM engine",
+                ("Build TensorRT LLM engine",
                  self._build_engine_from_inmemory_model))
             return
 
diff --git a/tensorrt_llm/lora_manager.py b/tensorrt_llm/lora_manager.py
index 788ffdd2ca..029a66a63a 100644
--- a/tensorrt_llm/lora_manager.py
+++ b/tensorrt_llm/lora_manager.py
@@ -205,14 +205,14 @@ def get_hf_target_modules(lora_weights, hf_modules):
 def invert_module_mapping(
     trtllm_modules_to_hf_modules: Dict[str, Union[str, List[str]]],
 ) -> Dict[str, str]:
-    """Invert module mapping from TensorRT-LLM -> HF to HF -> TensorRT-LLM.
+    """Invert module mapping from TensorRT LLM -> HF to HF -> TensorRT-LLM.
 
     Args:
-        trtllm_modules_to_hf_modules: Mapping from TensorRT-LLM module names to HF module names
+        trtllm_modules_to_hf_modules: Mapping from TensorRT LLM module names to HF module names
                                      (values can be strings or lists of strings)
 
     Returns:
-        Dictionary mapping HF module names to TensorRT-LLM module names
+        Dictionary mapping HF module names to TensorRT LLM module names
     """
     hf_modules_to_trtllm_modules: Dict[str, str] = {}
     for k, hf_modules in trtllm_modules_to_hf_modules.items():
diff --git a/tensorrt_llm/models/eagle/model.py b/tensorrt_llm/models/eagle/model.py
index 07a9d97843..e6edc7c676 100644
--- a/tensorrt_llm/models/eagle/model.py
+++ b/tensorrt_llm/models/eagle/model.py
@@ -736,7 +736,7 @@ class EagleForCausalLM(LLaMAForCausalLM):
             I|1|0|1|0
             J|0|1|0|1
             Note that we could've stored FG in KV cache and provide only IJ tokens here
-            with mask for past KV cache, but it is not supported in TensorRT-LLM attention at the moment.
+            with mask for past KV cache, but it is not supported in TensorRT LLM attention at the moment.
 
             Draft2 produces tokens K and L at positions 6 and 7.
         7. Resulting outputs are:
diff --git a/tensorrt_llm/models/mmdit_sd3/model.py b/tensorrt_llm/models/mmdit_sd3/model.py
index 480119bc36..546abbeade 100644
--- a/tensorrt_llm/models/mmdit_sd3/model.py
+++ b/tensorrt_llm/models/mmdit_sd3/model.py
@@ -599,7 +599,7 @@ class SD3ModelWeightsLoader(ModelWeightsLoader):
 
     def translate_to_external_key(self, tllm_key: str,
                                   tllm_to_externel_key_dict: dict):
-        """Convert and load external checkpoint into a TensorRT-LLM model.
+        """Convert and load external checkpoint into a TensorRT LLM model.
         """
         trtllm_to_hf_name = {
             r"transformer_blocks.(\d+).ff(\w*).net.1.weight":
diff --git a/tensorrt_llm/models/model_weights_loader.py b/tensorrt_llm/models/model_weights_loader.py
index 6cbb8993fd..ab05d8565b 100644
--- a/tensorrt_llm/models/model_weights_loader.py
+++ b/tensorrt_llm/models/model_weights_loader.py
@@ -26,7 +26,7 @@ class ModelWeightsFormat(Enum):
 
 
 class ModelWeightsLoader:
-    """Convert and load external checkpoint into a TensorRT-LLM model.
+    """Convert and load external checkpoint into a TensorRT LLM model.
 
     Attributes:
         model_dir                 : Model directory or in-memory torch model.
diff --git a/tensorrt_llm/models/modeling_utils.py b/tensorrt_llm/models/modeling_utils.py
index 7b2af7af15..a491d172f3 100644
--- a/tensorrt_llm/models/modeling_utils.py
+++ b/tensorrt_llm/models/modeling_utils.py
@@ -1956,7 +1956,7 @@ def save_config(config: PretrainedConfig, *, output_dir: str,
                 log: bool) -> None:
     config_path = Path(output_dir) / "config.json"
     if log:
-        logger.debug(f"Saving TensorRT-LLM configuration to {config_path}")
+        logger.debug(f"Saving TensorRT LLM configuration to {config_path}")
     config_path.parent.mkdir(exist_ok=True, parents=True)
     config_path.write_text(json.dumps(config.to_dict(), indent=4))
 
diff --git a/tensorrt_llm/models/qwen/model.py b/tensorrt_llm/models/qwen/model.py
index 0eb6e8ac44..7fc8800d14 100644
--- a/tensorrt_llm/models/qwen/model.py
+++ b/tensorrt_llm/models/qwen/model.py
@@ -478,7 +478,7 @@ class QWenForCausalLM(DecoderModelForCausalLM):
             logger.debug(f"HuggingFace model: {hf_model}")
 
             model = QWenForCausalLM(config)
-            logger.debug(f"TensorRT-LLM model: {model}")
+            logger.debug(f"TensorRT LLM model: {model}")
 
             if quant_config.quant_algo == QuantAlgo.W4A16_GPTQ:
                 weights = load_weights_from_hf_gptq_model(hf_model, config)
diff --git a/tensorrt_llm/models/stdit/model.py b/tensorrt_llm/models/stdit/model.py
index 780f5d0790..7e2cc5bdce 100644
--- a/tensorrt_llm/models/stdit/model.py
+++ b/tensorrt_llm/models/stdit/model.py
@@ -1482,7 +1482,7 @@ class STDiT3ModelWeightsLoader(ModelWeightsLoader):
 
     def translate_to_external_key(self, tllm_key: str,
                                   tllm_to_externel_key_dict: dict):
-        """Convert and load external checkpoint into a TensorRT-LLM model.
+        """Convert and load external checkpoint into a TensorRT LLM model.
         """
         trtllm_to_hf_name = {
             r"spatial_blocks.(\d+).attn.q_layernorm.weight":
diff --git a/tensorrt_llm/models/unet/embeddings.py b/tensorrt_llm/models/unet/embeddings.py
index 79ad6ae841..8bfe16a408 100644
--- a/tensorrt_llm/models/unet/embeddings.py
+++ b/tensorrt_llm/models/unet/embeddings.py
@@ -66,7 +66,7 @@ def get_timestep_embedding(timesteps,
     else:
         emb = concat([sin(emb), cos(emb)], dim=1)
 
-    #TODO Enable below logic when TensorRT-LLM supports pad feature.
+    #TODO Enable below logic when TensorRT LLM supports pad feature.
     # zero pad
     # if embedding_dim % 2 == 1:
     #     emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
diff --git a/tensorrt_llm/module.py b/tensorrt_llm/module.py
index 62d0a1de5d..c67674ec15 100644
--- a/tensorrt_llm/module.py
+++ b/tensorrt_llm/module.py
@@ -202,7 +202,7 @@ class Module(object):
         tm = {k: v for k, v in torch_module.named_parameters()}
 
         assert sorted(m.keys()) == sorted(tm.keys()), (
-            "The parameter names of the tensorrt-llm module must be the same with the torch module"
+            "The parameter names of the TensorRT LLM module must be the same with the torch module"
         )
 
         for k, v in self.named_parameters():
diff --git a/tensorrt_llm/plugin/plugin.py b/tensorrt_llm/plugin/plugin.py
index 1c4451221f..d2267d51c9 100644
--- a/tensorrt_llm/plugin/plugin.py
+++ b/tensorrt_llm/plugin/plugin.py
@@ -53,7 +53,7 @@ def _load_plugin_lib():
         handle.initTrtLlmPlugins.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
         handle.initTrtLlmPlugins.restype = ctypes.c_bool
     except AttributeError as err:
-        raise ImportError('TensorRT-LLM Plugin is unavailable') from err
+        raise ImportError('TensorRT LLM Plugin is unavailable') from err
 
     try:
         assert handle.initTrtLlmPlugins(
@@ -422,7 +422,7 @@ class PluginConfig(metaclass=PluginConfigMeta):
         init=False,
         metadata={
             "help":
-            "Enable TensorRT-LLM managed weights to speed up engine building process."
+            "Enable TensorRT LLM managed weights to speed up engine building process."
         })
     _use_fused_mlp: bool = field(
         default=True,
diff --git a/tensorrt_llm/serve/scripts/benchmark_dataset.py b/tensorrt_llm/serve/scripts/benchmark_dataset.py
index 35d2744aea..7b0a093b26 100644
--- a/tensorrt_llm/serve/scripts/benchmark_dataset.py
+++ b/tensorrt_llm/serve/scripts/benchmark_dataset.py
@@ -326,7 +326,7 @@ class RandomDataset(BenchmarkDataset):
 
 class CustomDataset(BenchmarkDataset):
     """
-    TensorRT-LLM customized dataset implementation.
+    TensorRT LLM customized dataset implementation.
     It assumes the dataset to be consist of several lines of json, each line is a minimal OpenAI API format request.
     Example format of each sample on each line:
     {
diff --git a/tensorrt_llm/serve/scripts/benchmark_serving.py b/tensorrt_llm/serve/scripts/benchmark_serving.py
index 303688f001..459c463179 100644
--- a/tensorrt_llm/serve/scripts/benchmark_serving.py
+++ b/tensorrt_llm/serve/scripts/benchmark_serving.py
@@ -4,7 +4,7 @@
 r"""Benchmark online serving throughput.
 
 On the server side, run one of the following commands:
-    TensorRT-LLM OpenAI API server
+    TensorRT LLM OpenAI API server
     trtllm-serve <your_model>
 
 On the client side, run:
diff --git a/tensorrt_llm/tools/plugin_gen/templates/functional.py.tpl b/tensorrt_llm/tools/plugin_gen/templates/functional.py.tpl
index 281da94730..be67eb117a 100644
--- a/tensorrt_llm/tools/plugin_gen/templates/functional.py.tpl
+++ b/tensorrt_llm/tools/plugin_gen/templates/functional.py.tpl
@@ -21,7 +21,7 @@ def _load_triton_plugin_lib():
     plugin_lib = "[[ plugin_lib_path ]]"
     handle = ctypes.CDLL(plugin_lib, mode=ctypes.RTLD_GLOBAL)
     if handle is None:
-        raise ImportError('TensorRT-LLM Triton Plugin is unavailable')
+        raise ImportError('TensorRT LLM Triton Plugin is unavailable')
     handle.initLibNvInferPlugins.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
     handle.initLibNvInferPlugins.restype = ctypes.c_bool
     assert handle.initLibNvInferPlugins(
diff --git a/tests/integration/defs/accuracy/accuracy_core.py b/tests/integration/defs/accuracy/accuracy_core.py
index e135ddfa01..85bec29c8c 100644
--- a/tests/integration/defs/accuracy/accuracy_core.py
+++ b/tests/integration/defs/accuracy/accuracy_core.py
@@ -423,7 +423,7 @@ class CliFlowAccuracyTestHarness:
         self.env = env
 
     def convert(self):
-        print("Converting model to TensorRT-LLM checkpoint...")
+        print("Converting model to TensorRT LLM checkpoint...")
 
         is_prequantized = False
         for quant_config_file in [
diff --git a/tests/integration/defs/common.py b/tests/integration/defs/common.py
index a61a5b8c28..809fe42f03 100644
--- a/tests/integration/defs/common.py
+++ b/tests/integration/defs/common.py
@@ -647,7 +647,7 @@ def get_trt_llm_lib_dir(venv):
         "import tensorrt_llm; print(f'{tensorrt_llm.__path__[0]}/libs')",
         caller=check_output).strip()
 
-    if "TensorRT-LLM version: " in output:
+    if "TensorRT LLM version: " in output:
         output = output.split('\n')[-1]
 
     return output.strip()
diff --git a/tests/integration/defs/examples/test_gemma.py b/tests/integration/defs/examples/test_gemma.py
index c0a6cbceaf..c04ea61806 100644
--- a/tests/integration/defs/examples/test_gemma.py
+++ b/tests/integration/defs/examples/test_gemma.py
@@ -253,7 +253,7 @@ def gemma_1gpu_summary(batch_size,
     "run gemm test on 1 gpu"
     skip_fp8_pre_ada(use_fp8=test_case == "fp8_kv_cache")
     if "smooth_quant" in test_case and "bfloat16" in data_type:
-        pytest.skip("TensorRT-LLM does not support SmoothQuant with bfloat16.")
+        pytest.skip("TensorRT LLM does not support SmoothQuant with bfloat16.")
 
     if any(params in gemma_model_root for params in
            ["gemma-7b", "9b", "27b"]) and get_device_memory() < 50000:
@@ -349,7 +349,7 @@ def test_llm_gemma_1gpu_mmlu(batch_size, data_type, gemma_model_root, llm_venv,
                              llm_rouge_root, llm_datasets_root, test_case):
     "run gemm test on 1 gpu"
     if "smooth_quant" in test_case and "bfloat16" in data_type:
-        pytest.skip("TensorRT-LLM does not support SmoothQuant with bfloat16.")
+        pytest.skip("TensorRT LLM does not support SmoothQuant with bfloat16.")
     ckpt_type = get_ckpt_type(gemma_model_root)
     ckpt_dir = get_ckpt_dir(gemma_model_root)
     vocab_file = get_vocab_file(gemma_model_root)
diff --git a/tests/integration/defs/perf/build.py b/tests/integration/defs/perf/build.py
index d01d007c54..e4d4ca2101 100644
--- a/tests/integration/defs/perf/build.py
+++ b/tests/integration/defs/perf/build.py
@@ -44,7 +44,7 @@ WEIGHT_STREAMING_DISABLED_VAL = "1.0"
 
 
 def parse_arguments():
-    parser = argparse.ArgumentParser(description='Build TensorRT-LLM models.')
+    parser = argparse.ArgumentParser(description='Build TensorRT LLM models.')
     parser.add_argument('-m',
                         '--model',
                         type=str,
diff --git a/tests/integration/defs/stress_test/stress_test.py b/tests/integration/defs/stress_test/stress_test.py
index 03456d8d5c..50f3592b47 100644
--- a/tests/integration/defs/stress_test/stress_test.py
+++ b/tests/integration/defs/stress_test/stress_test.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Stress test script for inference of model using TensorRT-LLM with PyTorch/TRT backend.
+Stress test script for inference of model using TensorRT LLM with PyTorch/TRT backend.
 This script is used for stress testing inference performance using trtllm-serve and genai-perf.
 """
 import contextlib
diff --git a/tests/integration/defs/utils/__init__.py b/tests/integration/defs/utils/__init__.py
index 4b60d0c485..059725c3cc 100644
--- a/tests/integration/defs/utils/__init__.py
+++ b/tests/integration/defs/utils/__init__.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Utility modules for TensorRT-LLM integration tests.
+Utility modules for TensorRT LLM integration tests.
 
 This package provides various utilities to simplify test development and reduce
 boilerplate code.
diff --git a/tests/unittest/_torch/thop/test_fused_qk_norm_rope.py b/tests/unittest/_torch/thop/test_fused_qk_norm_rope.py
index ad76e9705e..437e9de178 100644
--- a/tests/unittest/_torch/thop/test_fused_qk_norm_rope.py
+++ b/tests/unittest/_torch/thop/test_fused_qk_norm_rope.py
@@ -148,7 +148,7 @@ def test_fused_qk_norm_rope(head_dim, num_heads_group, num_tokens, is_neox,
                                         k_weight, base, is_neox, position_ids)
     output = qkv  # This op is inplace
 
-    # Compute reference output using TensorRT-LLM modules
+    # Compute reference output using TensorRT LLM modules
     ref_output = torch_ref_rms_norm_rope(qkv_copy, num_heads_q, num_heads_k,
                                          num_heads_v, head_dim, eps, q_weight,
                                          k_weight, base, is_neox, position_ids)
diff --git a/tests/unittest/others/test_plugins.py b/tests/unittest/others/test_plugins.py
index 6abc69f96a..842d3ad0f3 100644
--- a/tests/unittest/others/test_plugins.py
+++ b/tests/unittest/others/test_plugins.py
@@ -7,7 +7,7 @@ import tensorrt_llm.plugin as _tlp
 
 
 def test_load_library():
-    """Test loading the TensorRT-LLM plugin library."""
+    """Test loading the TensorRT LLM plugin library."""
     runtime = _trt.Runtime(_trt.Logger(_trt.Logger.WARNING))
     _trt.init_libnvinfer_plugins(runtime.logger,
                                  namespace=_tlp.TRT_LLM_PLUGIN_NAMESPACE)
diff --git a/tests/unittest/tools/test_prepare_dataset.py b/tests/unittest/tools/test_prepare_dataset.py
index d34c337e0b..05da19a595 100644
--- a/tests/unittest/tools/test_prepare_dataset.py
+++ b/tests/unittest/tools/test_prepare_dataset.py
@@ -54,7 +54,7 @@ class TestPrepareDatasetLora:
         Build the base command for running prepare_dataset.py.
 
         Args:
-            llm_root: Path to the TensorRT-LLM root directory
+            llm_root: Path to the TensorRT LLM root directory
 
         Returns:
             List[str]: Base command components
@@ -116,7 +116,7 @@ class TestPrepareDatasetLora:
         output.
 
         Args:
-            llm_root: Path to the TensorRT-LLM root directory
+            llm_root: Path to the TensorRT LLM root directory
             **kwargs: Keyword arguments for LoRA configuration
 
         Returns:
diff --git a/tests/unittest/trt/functional/test_gemm_swiglu.py b/tests/unittest/trt/functional/test_gemm_swiglu.py
index d264d97093..984c7387cf 100644
--- a/tests/unittest/trt/functional/test_gemm_swiglu.py
+++ b/tests/unittest/trt/functional/test_gemm_swiglu.py
@@ -82,11 +82,11 @@ class TestGemmSwiglu(unittest.TestCase):
             net.plugin_config.gemm_swiglu_plugin = dtype
 
         with tensorrt_llm.net_guard(net):
-            # Init TensorRT-LLM tensor for x
+            # Init TensorRT LLM tensor for x
             x_tensor = Tensor(name='x',
                               shape=x.shape,
                               dtype=str_dtype_to_trt(dtype))
-            # Init TensorRT-LLM tensor for w
+            # Init TensorRT LLM tensor for w
             w_tensor = Tensor(name='w',
                               shape=w.shape,
                               dtype=str_dtype_to_trt(dtype))
diff --git a/tests/unittest/trt/functional/test_low_latency_gemm.py b/tests/unittest/trt/functional/test_low_latency_gemm.py
index d662fb9213..457d73d3c3 100644
--- a/tests/unittest/trt/functional/test_low_latency_gemm.py
+++ b/tests/unittest/trt/functional/test_low_latency_gemm.py
@@ -56,11 +56,11 @@ class TestLowLatencyGemm(unittest.TestCase):
         net = builder.create_network()
         net.plugin_config.low_latency_gemm_plugin = "fp8"
         with tensorrt_llm.net_guard(net):
-            # Init TensorRT-LLM tensor for x
+            # Init TensorRT LLM tensor for x
             x_tensor = Tensor(name='x',
                               shape=x.shape,
                               dtype=str_dtype_to_trt('fp8'))
-            # Init TensorRT-LLM tensor for w
+            # Init TensorRT LLM tensor for w
             w_tensor = Tensor(name='w',
                               shape=w.shape,
                               dtype=str_dtype_to_trt('fp8'))
diff --git a/tests/unittest/trt/quantization/test_fp8_rowwise_gemm.py b/tests/unittest/trt/quantization/test_fp8_rowwise_gemm.py
index 1946e42b26..99c7b94da5 100644
--- a/tests/unittest/trt/quantization/test_fp8_rowwise_gemm.py
+++ b/tests/unittest/trt/quantization/test_fp8_rowwise_gemm.py
@@ -65,20 +65,20 @@ class TestFp8RowwiseGemm(unittest.TestCase):
         # Allow fp8_rowwise_gemm_plugin of dtype type
         network.plugin_config.fp8_rowwise_gemm_plugin = dtype
         with tensorrt_llm.net_guard(network):
-            # Init TensorRT-LLM tensor for mat1
+            # Init TensorRT LLM tensor for mat1
             x = Tensor(name='x',
                        shape=mat1.shape,
                        dtype=tensorrt_llm._utils.str_dtype_to_trt("fp8"))
-            # Init TensorRT-LLM tensor for mat2
+            # Init TensorRT LLM tensor for mat2
             y = Tensor(name='y',
                        shape=mat2.shape,
                        dtype=tensorrt_llm._utils.str_dtype_to_trt("fp8"))
-            # Init TensorRT-LLM tensor for per token scaling
+            # Init TensorRT LLM tensor for per token scaling
             scale_a = Tensor(
                 name='scale_a',
                 shape=scale_a_torch.shape,
                 dtype=tensorrt_llm._utils.str_dtype_to_trt("float32"))
-            # Init TensorRT-LLM tensor for per channel scaling
+            # Init TensorRT LLM tensor for per channel scaling
             scale_b = Tensor(
                 name='scale_b',
                 shape=scale_b_torch.shape,
@@ -97,7 +97,7 @@ class TestFp8RowwiseGemm(unittest.TestCase):
                 memory_pool_limits={trt.MemoryPoolType.WORKSPACE: 33554432}))
         assert engine is not None, "Failed to build engine"
 
-        # Create TensorRT-LLM session
+        # Create TensorRT LLM session
         session = tensorrt_llm.runtime.Session.from_serialized_engine(
             engine.serialize())
 
diff --git a/tests/unittest/trt/quantization/test_smooth_quant_gemm.py b/tests/unittest/trt/quantization/test_smooth_quant_gemm.py
index fe31c2a6a1..a3f5781cbe 100644
--- a/tests/unittest/trt/quantization/test_smooth_quant_gemm.py
+++ b/tests/unittest/trt/quantization/test_smooth_quant_gemm.py
@@ -64,17 +64,17 @@ class TestSmoothQuantGemm(unittest.TestCase):
         if use_plugin:
             network.plugin_config.smooth_quant_gemm_plugin = dtype
         with tensorrt_llm.net_guard(network):
-            # Init TensorRT-LLM tensor for mat1
+            # Init TensorRT LLM tensor for mat1
             x = Tensor(name='x',
                        shape=mat1.shape,
                        dtype=tensorrt_llm._utils.str_dtype_to_trt("int8"))
-            # Init TensorRT-LLM tensor for mat2
+            # Init TensorRT LLM tensor for mat2
             y = Tensor(name='y',
                        shape=mat2.shape,
                        dtype=tensorrt_llm._utils.str_dtype_to_trt("int8"))
-            # Init TensorRT-LLM tensor for per token scaling
+            # Init TensorRT LLM tensor for per token scaling
             scale_a = tensorrt_llm.functional.constant(scale_a_torch.numpy())
-            # Init TensorRT-LLM tensor for per channel scaling
+            # Init TensorRT LLM tensor for per channel scaling
             scale_b = tensorrt_llm.functional.constant(scale_b_torch.numpy())
             # Get output tensor for SQ gemm
             output = smooth_quant_gemm(x, y, scale_a, scale_b,
diff --git a/tests/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py b/tests/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py
index f0d3549044..7a97e1ba7a 100644
--- a/tests/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py
+++ b/tests/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py
@@ -55,39 +55,39 @@ class TestWeightOnlyGroupWiseQuantMatmul(unittest.TestCase):
         network = builder.create_network()
         network.plugin_config.weight_only_groupwise_quant_matmul_plugin = dtype
         with tensorrt_llm.net_guard(network):
-            # Init TensorRT-LLM tensor for activation
+            # Init TensorRT LLM tensor for activation
             activation = Tensor(
                 name='activation',
                 shape=th_activation.shape,
                 dtype=tensorrt_llm._utils.str_dtype_to_trt(dtype))
-            # Init TensorRT-LLM tensor for pre_quant_scale
+            # Init TensorRT LLM tensor for pre_quant_scale
             pre_quant_scale = Tensor(
                 name='pre_quant_scale',
                 shape=th_pre_quant_scale.shape,
                 dtype=tensorrt_llm._utils.str_dtype_to_trt(dtype))
-            # Init TensorRT-LLM tensor for weight
+            # Init TensorRT LLM tensor for weight
             weight = Tensor(name='weight',
                             shape=th_weight.shape,
                             dtype=tensorrt_llm._utils.str_dtype_to_trt(dtype))
-            # Init TensorRT-LLM tensor for scale
+            # Init TensorRT LLM tensor for scale
             scale = Tensor(name='scale',
                            shape=th_scale.shape,
                            dtype=tensorrt_llm._utils.str_dtype_to_trt(dtype))
-            # Init TensorRT-LLM tensor for zero
+            # Init TensorRT LLM tensor for zero
             if th_zero is not None:
                 zero = Tensor(name='zero',
                               shape=th_zero.shape,
                               dtype=tensorrt_llm._utils.str_dtype_to_trt(dtype))
             else:
                 zero = None
-            # Init TensorRT-LLM tensor for bias
+            # Init TensorRT LLM tensor for bias
             if th_bias is not None:
                 bias = Tensor(name='bias',
                               shape=th_bias.shape,
                               dtype=tensorrt_llm._utils.str_dtype_to_trt(dtype))
             else:
                 bias = None
-            # Init TensorRT-LLM tensor for alpha
+            # Init TensorRT LLM tensor for alpha
             if th_alpha is not None:
                 alpha = Parameter(th_alpha.cpu().numpy(),
                                   shape=th_alpha.shape,
diff --git a/tests/unittest/trt/quantization/test_weight_only_quant_matmul.py b/tests/unittest/trt/quantization/test_weight_only_quant_matmul.py
index 1dae1b405d..3d9623fb60 100644
--- a/tests/unittest/trt/quantization/test_weight_only_quant_matmul.py
+++ b/tests/unittest/trt/quantization/test_weight_only_quant_matmul.py
@@ -55,13 +55,13 @@ class TestWeightOnlyQuantMatmul(unittest.TestCase):
         if use_plugin:
             network.plugin_config.weight_only_quant_matmul_plugin = dtype
         with tensorrt_llm.net_guard(network):
-            # Init TensorRT-LLM tensor for mat1
+            # Init TensorRT LLM tensor for mat1
             x = Tensor(name='x',
                        shape=mat1.shape,
                        dtype=tensorrt_llm._utils.str_dtype_to_trt(dtype))
-            # Init TensorRT-LLM tensor for weight
+            # Init TensorRT LLM tensor for weight
             weights = constant(torch_to_numpy(processed_torch_weights))
-            # Init TensorRT-LLM tensor for per channel scaling
+            # Init TensorRT LLM tensor for per channel scaling
             scale = constant(torch_to_numpy(torch_weight_scales))
             # Get output tensor for WOQ Matmul
             output = weight_only_quant_matmul(x,
diff --git a/triton_backend/tools/inflight_batcher_llm/benchmark_core_model.py b/triton_backend/tools/inflight_batcher_llm/benchmark_core_model.py
index 8bd0329500..f79b6954ea 100644
--- a/triton_backend/tools/inflight_batcher_llm/benchmark_core_model.py
+++ b/triton_backend/tools/inflight_batcher_llm/benchmark_core_model.py
@@ -316,7 +316,7 @@ if __name__ == '__main__':
         default=["tensorrt_llm"],
         action="append",
         help=
-        "Specify the name of the TensorRT-LLM model. Can be specified multiple times to use multiple models."
+        "Specify the name of the TensorRT LLM model. Can be specified multiple times to use multiple models."
     )
     parser.add_argument('-c',
                         '--concurrency',