infra: move nvrtc_wrapper to conan (#3282)

* add pip scripts dir to path * move nvrtc_wrapper to conan * support building nvrtc wrapper from source --------- Signed-off-by: Tyler Burt <195370667+tburt-nv@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-04-15 05:31:01 +08:00 · 2025-04-15 05:31:01 +08:00 · c0dd6cbce0
commit c0dd6cbce0
parent 8cf2785bc6
13 changed files with 136 additions and 165 deletions
--- a/.gitignore
+++ b/.gitignore
@ -32,6 +32,7 @@ config.json
 /*.svg
 cpp/cmake-build-*
 cpp/.ccache
+cpp/.conan
 tensorrt_llm/bin
 tensorrt_llm/include
 tensorrt_llm/libs
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@ -358,6 +358,8 @@ if(ENABLE_MULTI_DEVICE)
  find_library(NCCL_LIB nccl HINTS ${NCCL_LIB_DIR})
 endif()

+find_package(tensorrt_llm_nvrtc_wrapper REQUIRED)
+
 get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)

 set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
@ -369,6 +371,7 @@ include_directories(
  ${CUDAToolkit_INCLUDE_DIRS}
  ${CUDNN_ROOT_DIR}/include
  ${NCCL_INCLUDE_DIR}
+  ${tensorrt_llm_nvrtc_wrapper_INCLUDE_DIRS}
  ${3RDPARTY_DIR}/cutlass/include
  ${3RDPARTY_DIR}/cutlass/tools/util/include
  ${3RDPARTY_DIR}/NVTX/include
--- a/cpp/conandata.yml
+++ b/cpp/conandata.yml
@ -0,0 +1 @@
+tensorrt_llm_nvrtc_wrapper: 1.9c24486cb2cd9dd9582b311b84e1b428d29a735a
--- a/cpp/conanfile.py
+++ b/cpp/conanfile.py
@ -0,0 +1,24 @@
+from conan import ConanFile
+from conan.tools.cmake import CMakeDeps, CMakeToolchain
+
+
+class TensorRT_LLM(ConanFile):
+    name = "TensorRT-LLM"
+    settings = "os", "arch", "compiler", "build_type"
+    virtualbuildenv = False
+    virtualrunenv = False
+
+    def requirements(self):
+        self.requires(
+            f"tensorrt_llm_nvrtc_wrapper/{self.conan_data['tensorrt_llm_nvrtc_wrapper']}"
+        )
+
+    def generate(self):
+        cmake = CMakeDeps(self)
+        cmake.generate()
+        tc = CMakeToolchain(self)
+        lib_dir = self.dependencies[
+            "tensorrt_llm_nvrtc_wrapper"].cpp_info.libdirs[0]
+        tc.variables[
+            "NVRTC_WRAPPER_LIB_SOURCE_REL_LOC"] = lib_dir + "/libtensorrt_llm_nvrtc_wrapper.so"
+        tc.generate()
--- a/cpp/tensorrt_llm/CMakeLists.txt
+++ b/cpp/tensorrt_llm/CMakeLists.txt
@ -139,55 +139,13 @@ find_package(Threads REQUIRED)
 target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE Threads::Threads)
 target_link_libraries(${EXECUTOR_TARGET} INTERFACE Threads::Threads)

-set(NVRTC_WRAPPER_TARGET tensorrt_llm_nvrtc_wrapper)
-set(NVRTC_WRAPPER_TARGET_ARCH ${TARGET_ARCH})
-
-if(BUILD_NVRTC_WRAPPER)
-  add_subdirectory(
-    kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper)
-else()
-  add_library(${NVRTC_WRAPPER_TARGET} SHARED IMPORTED)
-  set(NVRTC_WRAPPER_LIB_TARBALL
-      "${CMAKE_CURRENT_SOURCE_DIR}/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/${NVRTC_WRAPPER_TARGET_ARCH}/${NVRTC_WRAPPER_TARGET}.tar.xz"
-  )
-  set(NVRTC_WRAPPER_LIB_BINARY_DIR
-      "${CMAKE_CURRENT_BINARY_DIR}/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper"
-  )
-  if(NOT WIN32) # Linux
-    set(NVRTC_WRAPPER_LIB_NAME "lib${NVRTC_WRAPPER_TARGET}.so")
-  else() # Windows
-    set(NVRTC_WRAPPER_LIB_NAME "${NVRTC_WRAPPER_TARGET}.lib")
-    set(NVRTC_WRAPPER_DLL_NAME "${NVRTC_WRAPPER_TARGET}.dll")
-    set(NVRTC_WRAPPER_DLL_PATH
-        "${NVRTC_WRAPPER_LIB_BINARY_DIR}/${NVRTC_WRAPPER_DLL_NAME}")
-  endif()
-  set(NVRTC_WRAPPER_LIB_PATH
-      "${NVRTC_WRAPPER_LIB_BINARY_DIR}/${NVRTC_WRAPPER_LIB_NAME}")
-  add_custom_command(
-    OUTPUT ${NVRTC_WRAPPER_LIB_PATH} ${NVRTC_WRAPPER_DLL_PATH}
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${NVRTC_WRAPPER_LIB_BINARY_DIR}
-    COMMAND ${CMAKE_COMMAND} -E chdir ${NVRTC_WRAPPER_LIB_BINARY_DIR}
-            ${CMAKE_COMMAND} -E tar xf ${NVRTC_WRAPPER_LIB_TARBALL}
-    DEPENDS ${NVRTC_WRAPPER_LIB_TARBALL}
-    VERBATIM)
-  add_custom_target(${NVRTC_WRAPPER_TARGET}_helper
-                    DEPENDS ${NVRTC_WRAPPER_LIB_PATH} ${NVRTC_WRAPPER_DLL_PATH})
-  add_dependencies(${NVRTC_WRAPPER_TARGET} ${NVRTC_WRAPPER_TARGET}_helper)
-  set_property(TARGET ${NVRTC_WRAPPER_TARGET}
-               PROPERTY IMPORTED_LOCATION ${NVRTC_WRAPPER_LIB_PATH})
-  if(WIN32)
-    set_property(TARGET ${NVRTC_WRAPPER_TARGET}
-                 PROPERTY IMPORTED_IMPLIB ${NVRTC_WRAPPER_DLL_PATH})
-  endif()
-
-  file(SIZE ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL} NVRTC_WRAPPER_LIB_SIZE)
-  if(NVRTC_WRAPPER_LIB_SIZE LESS 1024)
-    message(
-      FATAL_ERROR
-        "The nvrtc wrapper library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
-    )
-  endif()
-endif()
+# NVRTC_WRAPPER_LIB_SOURCE_REL_LOC is defined in cpp/conanfile.py
+set(NVRTC_WRAPPER_LIB_BINARY_REL_LOC
+    "kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/libtensorrt_llm_nvrtc_wrapper.so"
+)
+# Copy the .so to build directory, which is needed in build_wheel.py.
+configure_file(${NVRTC_WRAPPER_LIB_SOURCE_REL_LOC}
+               ${NVRTC_WRAPPER_LIB_BINARY_REL_LOC} COPYONLY)

 set(TRTLLM_LINK_LIBS
    ${CUDA_DRV_LIB}
@ -273,7 +231,9 @@ if(NOT WIN32)
                                                    "-Wl,-rpath='$ORIGIN'")
 endif()

-target_link_libraries(${SHARED_TARGET} PUBLIC ${NVRTC_WRAPPER_TARGET})
+target_link_libraries(
+  ${SHARED_TARGET}
+  PUBLIC tensorrt_llm_nvrtc_wrapper::tensorrt_llm_nvrtc_wrapper)

 if(BUILD_PYT)
  add_subdirectory(thop)
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/CMakeLists.txt
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/CMakeLists.txt
@ -20,9 +20,6 @@ file(GLOB_RECURSE SRC_CPP *.cpp)
 set(SRC_CU)
 set(SRC_CU_EXTRA)

-# Exclude files in nvrtcWrapper folder.
-list(FILTER SRC_CPP EXCLUDE REGEX ".*nvrtcWrapper/src.*")
-
 filter_cuda_archs("80" SRC_CPP)
 filter_cuda_archs("86" SRC_CPP)
 filter_cuda_archs("89" SRC_CPP)
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/compileEngine.cpp
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/compileEngine.cpp
@ -16,12 +16,12 @@
 #include "compileEngine.h"

 #include "cubinObj.h"
-#include "nvrtcWrapper/include/nvrtcWrapper.h"
 #include "tensorrt_llm/common/assert.h"
 #include "tensorrt_llm/common/stringUtils.h"
 #include "tensorrt_llm/common/tllmException.h"
 #include "tensorrt_llm/common/utils.h"
 #include "tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/kernelUtils.h"
+#include <nvrtcWrapper.h>
 #include <string>
 #include <vector>

--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/aarch64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/aarch64-linux-gnu/version.txt
@ -1,2 +0,0 @@
-5ad6be58302fad71488246c4dea6f96d710143988a195d67b304ea251bd0aa89  libtensorrt_llm_nvrtc_wrapper.so
-commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/include/nvrtcWrapper.h
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/include/nvrtcWrapper.h
@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This file is NOT thread safe.
- */
-#pragma once
-#include <stddef.h>
-
-#ifdef _WIN32
-
-#if COMPILING_DLL
-#define DLLEXPORT __declspec(dllexport)
-#else
-#define DLLEXPORT __declspec(dllimport)
-#endif
-
-#else             // _WIN32
-#define DLLEXPORT // Nothing.
-#endif
-
-#if __cplusplus
-extern "C"
-{
-#endif
-
-    typedef enum
-    {
-        // sm >= 80
-        TLLM_XQA_JIT_HMMA = 0,
-        // sm == 90
-        TLLM_XQA_JIT_QGMMA = 1
-    } tllmXqaJitKernelType;
-
-    typedef enum
-    {
-        TLLM_XQA_JIT_ROPE_NONE = 0,
-        TLLM_XQA_JIT_ROPE_NEOX = 1,
-        TLLM_XQA_JIT_ROPE_GPTJ = 2
-    } tllmXqaJitRopeStyle;
-
-    typedef struct
-    {
-        // Compute capability, e.g. 89.
-        int sm;
-
-        unsigned int head_size;
-        unsigned int num_q_heads;
-        unsigned int num_kv_heads;
-        unsigned int beam_width;
-        unsigned int tokens_per_block;
-        bool multi_query_tokens;
-        unsigned int q_seq_len;
-        bool paged_kv_cache;
-
-        // Actual type: tensorrt_llm::kernels::Data_type
-        int data_type;
-        int kv_cache_data_type;
-
-        tllmXqaJitKernelType kernel_type;
-
-        bool fp8_output;
-        bool use_input_kv;
-        tllmXqaJitRopeStyle rope_style; // useful only when use_input_kv is true.
-    } tllmXqaJitContext;
-
-    // tllmXqaJitProgram is an opaque handle for a program.
-    typedef struct _tllmXqaJitProgram* tllmXqaJitProgram;
-
-    typedef enum
-    {
-        TLLM_XQA_JIT_SUCCESS = 0,
-        TLLM_XQA_JIT_INVALID_INPUT = 1,
-        TLLM_XQA_JIT_INTERNAL_ERROR = 2,
-    } tllmXqaJitStatus;
-
-    // context must outlive prog.
-    DLLEXPORT tllmXqaJitStatus tllmXqaJitCreateAndCompileProgram(
-        tllmXqaJitProgram* prog, tllmXqaJitContext const* context);
-    DLLEXPORT tllmXqaJitStatus tllmXqaJitGetCUBINSize(tllmXqaJitProgram prog, size_t* cubinSizeRet);
-    DLLEXPORT tllmXqaJitStatus tllmXqaJitGetCUBIN(tllmXqaJitProgram prog, char* cubin);
-    DLLEXPORT tllmXqaJitStatus tllmXqaJitDestroyProgram(tllmXqaJitProgram* prog);
-
-    // Returns the size of the error string associated with the last non-success tllmXqaJit function call (including the
-    // trailing \0). Returns 0 if there is no such non-success function call.
-    DLLEXPORT size_t tllmXqaJitGetLastErrorStringSize();
-    // Returns the error string.
-    // Output can be nullptr if the returned value of tllmGetLastErrorStringSize() is 0.
-    DLLEXPORT void tllmXqaJitGetLastErrorString(char* output);
-
-#if __cplusplus
-} // extern "C"
-#endif
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/x86_64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/x86_64-linux-gnu/version.txt
@ -1,2 +0,0 @@
-9d1104bbe6b4f258482549ec71c9d1aed0de912b5824dced5cf7829bff66ba0d  libtensorrt_llm_nvrtc_wrapper.so
-commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a
--- a/docker/Dockerfile.user
+++ b/docker/Dockerfile.user
@ -18,4 +18,6 @@ RUN apt-get update && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

+ENV PATH="/home/${USER_NAME}/.local/bin:${PATH}"
+
 USER ${USER_NAME}
--- a/docker/Makefile
+++ b/docker/Makefile
@ -107,13 +107,25 @@ DOCKER_RUN_OPTS   ?= --rm -it --ipc=host --ulimit memlock=-1 --ulimit stack=6710
 DOCKER_RUN_ARGS   ?=
 GPU_OPTS          ?= --gpus=all
 SOURCE_DIR        ?= $(shell readlink -f ..)
+NVRTC_WRAPPER_DIR ?=
 CODE_DIR          ?= /code/tensorrt_llm
 CCACHE_DIR        ?= ${CODE_DIR}/cpp/.ccache
+CONAN_DIR         ?= ${CODE_DIR}/cpp/.conan
 RUN_CMD           ?=
 CONTAINER_NAME    ?= tensorrt_llm
 WORK_DIR          ?= $(CODE_DIR)
 DOCKER_PULL       ?= 0

+ifneq ($(NVRTC_WRAPPER_DIR), )
+NVRTC_WRAPPER_MOUNT := --volume $(NVRTC_WRAPPER_DIR):/mnt/src/tensorrt_llm_nvrtc_wrapper
+else
+NVRTC_WRAPPER_MOUNT :=
+endif
+
+ifeq ($(LOCAL_USER),1)
+	$(call add_local_user,$(IMAGE_WITH_TAG))
+endif
+
 %_run:
 ifeq ($(DOCKER_PULL),1)
 	@$(MAKE) --no-print-directory $*_pull
@ -124,8 +136,10 @@ endif
 	docker run $(DOCKER_RUN_OPTS) $(DOCKER_RUN_ARGS) \
    		$(GPU_OPTS) \
    		--volume $(SOURCE_DIR):$(CODE_DIR) \
+    		$(NVRTC_WRAPPER_MOUNT) \
    		--env "CCACHE_DIR=${CCACHE_DIR}" \
    		--env "CCACHE_BASEDIR=${CODE_DIR}" \
+    	    --env "CONAN_HOME=${CONAN_DIR}" \
    		--workdir $(WORK_DIR) \
    		--hostname $(shell hostname)-$* \
    		--name $(CONTAINER_NAME)-$*-$(USER_NAME) \
--- a/scripts/build_wheel.py
+++ b/scripts/build_wheel.py
@ -16,6 +16,7 @@

 import os
 import platform
+import re
 import sys
 from argparse import ArgumentParser
 from contextlib import contextmanager
@ -23,7 +24,8 @@ from functools import partial
 from multiprocessing import cpu_count
 from pathlib import Path
 from shutil import copy, copytree, rmtree
-from subprocess import CalledProcessError, check_output, run
+from subprocess import DEVNULL, CalledProcessError, check_output, run
+from tempfile import TemporaryDirectory
 from textwrap import dedent
 from typing import List

@ -76,6 +78,7 @@ def main(*,
         extra_make_targets: str = "",
         trt_root: str = '/usr/local/tensorrt',
         nccl_root: str = None,
+         nvrtc_wrapper_root: str = None,
         clean: bool = False,
         clean_wheel: bool = False,
         configure_cmake: bool = False,
@ -181,7 +184,7 @@ def main(*,
        cmake_def_args.append(f"-DNCCL_INCLUDE_DIR={nccl_root}/include")

    build_dir = get_build_dir(build_dir, build_type)
-    first_build = not build_dir.exists()
+    first_build = not Path(build_dir, "CMakeFiles").exists()

    if clean and build_dir.exists():
        clear_folder(build_dir)  # Keep the folder in case it is mounted.
@ -220,9 +223,77 @@ def main(*,
        targets.append("executorWorker")

    source_dir = get_source_dir()
+
+    def install_conan():
+        # Determine the system ID
+        with Path("/etc/os-release").open("r") as f:
+            for line in f:
+                if line.startswith("ID="):
+                    system_id = line.split("=")[1].strip()
+                    break
+            else:
+                system_id = "unknown"
+        # Install Conan if it's not already installed
+        # TODO move this install to the container image
+        conan_path = "conan"
+        if "rocky" not in system_id:
+            build_run(f"\"{sys.executable}\" -m pip install conan==2.14.0")
+        else:
+            conan_dir = Path(build_dir, "tool/conan")
+            conan_dir.mkdir(parents=True, exist_ok=True)
+            conan_path = conan_dir / "bin/conan"
+            if not conan_path.exists():
+                with TemporaryDirectory() as tmpdir:
+                    tmpdir_p = Path(tmpdir)
+                    archive_p = tmpdir_p / "conan.tgz"
+                    build_run(
+                        f"wget --retry-connrefused -O {archive_p} https://github.com/conan-io/conan/releases/download/2.14.0/conan-2.14.0-linux-x86_64.tgz"
+                    )
+                    build_run(f"tar -C {conan_dir} -xf {archive_p}")
+        # Install dependencies with Conan
+        build_run(
+            f"{conan_path} remote add -verror --force tensorrt-llm https://edge.urm.nvidia.com/artifactory/api/conan/sw-tensorrt-llm-conan"
+        )
+        build_run(f"{conan_path} profile detect -f")
+        return conan_path
+
+    conan_path = install_conan()
+
+    # Build the NVRTC wrapper if the source directory exists
+    if nvrtc_wrapper_root is not None and Path(nvrtc_wrapper_root).exists():
+        print(f"Building the NVRTC wrapper from source in {nvrtc_wrapper_root}")
+        conan_data = Path(source_dir, "conandata.yml").read_text()
+        nvrtc_wrapper_version = re.search(
+            r'tensorrt_llm_nvrtc_wrapper:\s*(\S+)', conan_data).group(1)
+        build_run(
+            f"{conan_path} editable add {nvrtc_wrapper_root}/conan/nvrtc_wrapper --version {nvrtc_wrapper_version}"
+        )
+        nvrtc_wrapper_args = ""
+        if clean:
+            nvrtc_wrapper_args += " -c"
+        if configure_cmake:
+            nvrtc_wrapper_args += " --configure_cmake"
+        if use_ccache:
+            nvrtc_wrapper_args += " --use_ccache"
+        build_run(
+            f'"{sys.executable}" {nvrtc_wrapper_root}/scripts/build_wheel.py {nvrtc_wrapper_args} -a "{cuda_architectures}" -D "USE_CXX11_ABI=1;BUILD_NVRTC_WRAPPER=1" -l'
+        )
+    else:
+        # If the NVRTC wrapper source directory is not present, remove the editable NVRTC wrapper from the conan cache
+        build_run(
+            f"{conan_path} editable remove -r 'tensorrt_llm_nvrtc_wrapper/*'",
+            stdout=DEVNULL,
+            stderr=DEVNULL)
+
    with working_directory(build_dir):
-        cmake_def_args = " ".join(cmake_def_args)
        if clean or first_build or configure_cmake:
+            build_run(
+                f"{conan_path} install --remote=tensorrt-llm --output-folder={build_dir}/conan -s 'build_type={build_type}' {source_dir}"
+            )
+            cmake_def_args.append(
+                f"-DCMAKE_TOOLCHAIN_FILE={build_dir}/conan/conan_toolchain.cmake"
+            )
+            cmake_def_args = " ".join(cmake_def_args)
            cmake_configure_command = (
                f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBUILD_PYBIND="{build_pybind}"'
                f' -DNVTX_DISABLE="{disable_nvtx}" -DBUILD_MICRO_BENCHMARKS={build_micro_benchmarks}'
@ -509,6 +580,12 @@ def add_arguments(parser: ArgumentParser):
                        help="Directory to find TensorRT headers/libs")
    parser.add_argument("--nccl_root",
                        help="Directory to find NCCL headers/libs")
+    parser.add_argument(
+        "--nvrtc_wrapper_root",
+        default="/mnt/src/tensorrt_llm_nvrtc_wrapper",
+        help=
+        "Directory to find internal NVRTC wrapper source code. If the directory exists, the NVRTC wrapper will be built from source."
+    )
    parser.add_argument("--build_dir",
                        type=Path,
                        help="Directory where cpp sources are built")
				`@ -0,0 +1 @@`
				`tensorrt_llm_nvrtc_wrapper: 1.9c24486cb2cd9dd9582b311b84e1b428d29a735a`