infra: move nvrtc_wrapper to conan (#3282)

* add pip scripts dir to path
* move nvrtc_wrapper to conan
* support building nvrtc wrapper from source

---------

Signed-off-by: Tyler Burt <195370667+tburt-nv@users.noreply.github.com>
This commit is contained in:
tburt-nv 2025-04-15 05:31:01 +08:00 committed by GitHub
parent 8cf2785bc6
commit c0dd6cbce0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 136 additions and 165 deletions

1
.gitignore vendored
View File

@ -32,6 +32,7 @@ config.json
/*.svg
cpp/cmake-build-*
cpp/.ccache
cpp/.conan
tensorrt_llm/bin
tensorrt_llm/include
tensorrt_llm/libs

View File

@ -358,6 +358,8 @@ if(ENABLE_MULTI_DEVICE)
find_library(NCCL_LIB nccl HINTS ${NCCL_LIB_DIR})
endif()
find_package(tensorrt_llm_nvrtc_wrapper REQUIRED)
get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)
set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
@ -369,6 +371,7 @@ include_directories(
${CUDAToolkit_INCLUDE_DIRS}
${CUDNN_ROOT_DIR}/include
${NCCL_INCLUDE_DIR}
${tensorrt_llm_nvrtc_wrapper_INCLUDE_DIRS}
${3RDPARTY_DIR}/cutlass/include
${3RDPARTY_DIR}/cutlass/tools/util/include
${3RDPARTY_DIR}/NVTX/include

1
cpp/conandata.yml Normal file
View File

@ -0,0 +1 @@
tensorrt_llm_nvrtc_wrapper: 1.9c24486cb2cd9dd9582b311b84e1b428d29a735a

24
cpp/conanfile.py Normal file
View File

@ -0,0 +1,24 @@
from conan import ConanFile
from conan.tools.cmake import CMakeDeps, CMakeToolchain
class TensorRT_LLM(ConanFile):
name = "TensorRT-LLM"
settings = "os", "arch", "compiler", "build_type"
virtualbuildenv = False
virtualrunenv = False
def requirements(self):
self.requires(
f"tensorrt_llm_nvrtc_wrapper/{self.conan_data['tensorrt_llm_nvrtc_wrapper']}"
)
def generate(self):
cmake = CMakeDeps(self)
cmake.generate()
tc = CMakeToolchain(self)
lib_dir = self.dependencies[
"tensorrt_llm_nvrtc_wrapper"].cpp_info.libdirs[0]
tc.variables[
"NVRTC_WRAPPER_LIB_SOURCE_REL_LOC"] = lib_dir + "/libtensorrt_llm_nvrtc_wrapper.so"
tc.generate()

View File

@ -139,55 +139,13 @@ find_package(Threads REQUIRED)
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE Threads::Threads)
target_link_libraries(${EXECUTOR_TARGET} INTERFACE Threads::Threads)
set(NVRTC_WRAPPER_TARGET tensorrt_llm_nvrtc_wrapper)
set(NVRTC_WRAPPER_TARGET_ARCH ${TARGET_ARCH})
if(BUILD_NVRTC_WRAPPER)
add_subdirectory(
kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper)
else()
add_library(${NVRTC_WRAPPER_TARGET} SHARED IMPORTED)
set(NVRTC_WRAPPER_LIB_TARBALL
"${CMAKE_CURRENT_SOURCE_DIR}/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/${NVRTC_WRAPPER_TARGET_ARCH}/${NVRTC_WRAPPER_TARGET}.tar.xz"
)
set(NVRTC_WRAPPER_LIB_BINARY_DIR
"${CMAKE_CURRENT_BINARY_DIR}/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper"
)
if(NOT WIN32) # Linux
set(NVRTC_WRAPPER_LIB_NAME "lib${NVRTC_WRAPPER_TARGET}.so")
else() # Windows
set(NVRTC_WRAPPER_LIB_NAME "${NVRTC_WRAPPER_TARGET}.lib")
set(NVRTC_WRAPPER_DLL_NAME "${NVRTC_WRAPPER_TARGET}.dll")
set(NVRTC_WRAPPER_DLL_PATH
"${NVRTC_WRAPPER_LIB_BINARY_DIR}/${NVRTC_WRAPPER_DLL_NAME}")
endif()
set(NVRTC_WRAPPER_LIB_PATH
"${NVRTC_WRAPPER_LIB_BINARY_DIR}/${NVRTC_WRAPPER_LIB_NAME}")
add_custom_command(
OUTPUT ${NVRTC_WRAPPER_LIB_PATH} ${NVRTC_WRAPPER_DLL_PATH}
COMMAND ${CMAKE_COMMAND} -E make_directory ${NVRTC_WRAPPER_LIB_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E chdir ${NVRTC_WRAPPER_LIB_BINARY_DIR}
${CMAKE_COMMAND} -E tar xf ${NVRTC_WRAPPER_LIB_TARBALL}
DEPENDS ${NVRTC_WRAPPER_LIB_TARBALL}
VERBATIM)
add_custom_target(${NVRTC_WRAPPER_TARGET}_helper
DEPENDS ${NVRTC_WRAPPER_LIB_PATH} ${NVRTC_WRAPPER_DLL_PATH})
add_dependencies(${NVRTC_WRAPPER_TARGET} ${NVRTC_WRAPPER_TARGET}_helper)
set_property(TARGET ${NVRTC_WRAPPER_TARGET}
PROPERTY IMPORTED_LOCATION ${NVRTC_WRAPPER_LIB_PATH})
if(WIN32)
set_property(TARGET ${NVRTC_WRAPPER_TARGET}
PROPERTY IMPORTED_IMPLIB ${NVRTC_WRAPPER_DLL_PATH})
endif()
file(SIZE ${INTERNAL_CUTLASS_KERNELS_LIB_TARBALL} NVRTC_WRAPPER_LIB_SIZE)
if(NVRTC_WRAPPER_LIB_SIZE LESS 1024)
message(
FATAL_ERROR
"The nvrtc wrapper library is truncated or incomplete. This is usually caused by using Git LFS (Large File Storage) incorrectly. Please try running command `git lfs install && git lfs pull`."
)
endif()
endif()
# NVRTC_WRAPPER_LIB_SOURCE_REL_LOC is defined in cpp/conanfile.py
set(NVRTC_WRAPPER_LIB_BINARY_REL_LOC
"kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/libtensorrt_llm_nvrtc_wrapper.so"
)
# Copy the .so to build directory, which is needed in build_wheel.py.
configure_file(${NVRTC_WRAPPER_LIB_SOURCE_REL_LOC}
${NVRTC_WRAPPER_LIB_BINARY_REL_LOC} COPYONLY)
set(TRTLLM_LINK_LIBS
${CUDA_DRV_LIB}
@ -273,7 +231,9 @@ if(NOT WIN32)
"-Wl,-rpath='$ORIGIN'")
endif()
target_link_libraries(${SHARED_TARGET} PUBLIC ${NVRTC_WRAPPER_TARGET})
target_link_libraries(
${SHARED_TARGET}
PUBLIC tensorrt_llm_nvrtc_wrapper::tensorrt_llm_nvrtc_wrapper)
if(BUILD_PYT)
add_subdirectory(thop)

View File

@ -20,9 +20,6 @@ file(GLOB_RECURSE SRC_CPP *.cpp)
set(SRC_CU)
set(SRC_CU_EXTRA)
# Exclude files in nvrtcWrapper folder.
list(FILTER SRC_CPP EXCLUDE REGEX ".*nvrtcWrapper/src.*")
filter_cuda_archs("80" SRC_CPP)
filter_cuda_archs("86" SRC_CPP)
filter_cuda_archs("89" SRC_CPP)

View File

@ -16,12 +16,12 @@
#include "compileEngine.h"
#include "cubinObj.h"
#include "nvrtcWrapper/include/nvrtcWrapper.h"
#include "tensorrt_llm/common/assert.h"
#include "tensorrt_llm/common/stringUtils.h"
#include "tensorrt_llm/common/tllmException.h"
#include "tensorrt_llm/common/utils.h"
#include "tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/kernelUtils.h"
#include <nvrtcWrapper.h>
#include <string>
#include <vector>

View File

@ -1,2 +0,0 @@
5ad6be58302fad71488246c4dea6f96d710143988a195d67b304ea251bd0aa89 libtensorrt_llm_nvrtc_wrapper.so
commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a

View File

@ -1,104 +0,0 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is NOT thread safe.
*/
#pragma once
#include <stddef.h>
#ifdef _WIN32
#if COMPILING_DLL
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT __declspec(dllimport)
#endif
#else // _WIN32
#define DLLEXPORT // Nothing.
#endif
#if __cplusplus
extern "C"
{
#endif
typedef enum
{
// sm >= 80
TLLM_XQA_JIT_HMMA = 0,
// sm == 90
TLLM_XQA_JIT_QGMMA = 1
} tllmXqaJitKernelType;
typedef enum
{
TLLM_XQA_JIT_ROPE_NONE = 0,
TLLM_XQA_JIT_ROPE_NEOX = 1,
TLLM_XQA_JIT_ROPE_GPTJ = 2
} tllmXqaJitRopeStyle;
typedef struct
{
// Compute capability, e.g. 89.
int sm;
unsigned int head_size;
unsigned int num_q_heads;
unsigned int num_kv_heads;
unsigned int beam_width;
unsigned int tokens_per_block;
bool multi_query_tokens;
unsigned int q_seq_len;
bool paged_kv_cache;
// Actual type: tensorrt_llm::kernels::Data_type
int data_type;
int kv_cache_data_type;
tllmXqaJitKernelType kernel_type;
bool fp8_output;
bool use_input_kv;
tllmXqaJitRopeStyle rope_style; // useful only when use_input_kv is true.
} tllmXqaJitContext;
// tllmXqaJitProgram is an opaque handle for a program.
typedef struct _tllmXqaJitProgram* tllmXqaJitProgram;
typedef enum
{
TLLM_XQA_JIT_SUCCESS = 0,
TLLM_XQA_JIT_INVALID_INPUT = 1,
TLLM_XQA_JIT_INTERNAL_ERROR = 2,
} tllmXqaJitStatus;
// context must outlive prog.
DLLEXPORT tllmXqaJitStatus tllmXqaJitCreateAndCompileProgram(
tllmXqaJitProgram* prog, tllmXqaJitContext const* context);
DLLEXPORT tllmXqaJitStatus tllmXqaJitGetCUBINSize(tllmXqaJitProgram prog, size_t* cubinSizeRet);
DLLEXPORT tllmXqaJitStatus tllmXqaJitGetCUBIN(tllmXqaJitProgram prog, char* cubin);
DLLEXPORT tllmXqaJitStatus tllmXqaJitDestroyProgram(tllmXqaJitProgram* prog);
// Returns the size of the error string associated with the last non-success tllmXqaJit function call (including the
// trailing \0). Returns 0 if there is no such non-success function call.
DLLEXPORT size_t tllmXqaJitGetLastErrorStringSize();
// Returns the error string.
// Output can be nullptr if the returned value of tllmGetLastErrorStringSize() is 0.
DLLEXPORT void tllmXqaJitGetLastErrorString(char* output);
#if __cplusplus
} // extern "C"
#endif

View File

@ -1,2 +0,0 @@
9d1104bbe6b4f258482549ec71c9d1aed0de912b5824dced5cf7829bff66ba0d libtensorrt_llm_nvrtc_wrapper.so
commit 9c24486cb2cd9dd9582b311b84e1b428d29a735a

View File

@ -18,4 +18,6 @@ RUN apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV PATH="/home/${USER_NAME}/.local/bin:${PATH}"
USER ${USER_NAME}

View File

@ -107,13 +107,25 @@ DOCKER_RUN_OPTS ?= --rm -it --ipc=host --ulimit memlock=-1 --ulimit stack=6710
DOCKER_RUN_ARGS ?=
GPU_OPTS ?= --gpus=all
SOURCE_DIR ?= $(shell readlink -f ..)
NVRTC_WRAPPER_DIR ?=
CODE_DIR ?= /code/tensorrt_llm
CCACHE_DIR ?= ${CODE_DIR}/cpp/.ccache
CONAN_DIR ?= ${CODE_DIR}/cpp/.conan
RUN_CMD ?=
CONTAINER_NAME ?= tensorrt_llm
WORK_DIR ?= $(CODE_DIR)
DOCKER_PULL ?= 0
ifneq ($(NVRTC_WRAPPER_DIR), )
NVRTC_WRAPPER_MOUNT := --volume $(NVRTC_WRAPPER_DIR):/mnt/src/tensorrt_llm_nvrtc_wrapper
else
NVRTC_WRAPPER_MOUNT :=
endif
ifeq ($(LOCAL_USER),1)
$(call add_local_user,$(IMAGE_WITH_TAG))
endif
%_run:
ifeq ($(DOCKER_PULL),1)
@$(MAKE) --no-print-directory $*_pull
@ -124,8 +136,10 @@ endif
docker run $(DOCKER_RUN_OPTS) $(DOCKER_RUN_ARGS) \
$(GPU_OPTS) \
--volume $(SOURCE_DIR):$(CODE_DIR) \
$(NVRTC_WRAPPER_MOUNT) \
--env "CCACHE_DIR=${CCACHE_DIR}" \
--env "CCACHE_BASEDIR=${CODE_DIR}" \
--env "CONAN_HOME=${CONAN_DIR}" \
--workdir $(WORK_DIR) \
--hostname $(shell hostname)-$* \
--name $(CONTAINER_NAME)-$*-$(USER_NAME) \

View File

@ -16,6 +16,7 @@
import os
import platform
import re
import sys
from argparse import ArgumentParser
from contextlib import contextmanager
@ -23,7 +24,8 @@ from functools import partial
from multiprocessing import cpu_count
from pathlib import Path
from shutil import copy, copytree, rmtree
from subprocess import CalledProcessError, check_output, run
from subprocess import DEVNULL, CalledProcessError, check_output, run
from tempfile import TemporaryDirectory
from textwrap import dedent
from typing import List
@ -76,6 +78,7 @@ def main(*,
extra_make_targets: str = "",
trt_root: str = '/usr/local/tensorrt',
nccl_root: str = None,
nvrtc_wrapper_root: str = None,
clean: bool = False,
clean_wheel: bool = False,
configure_cmake: bool = False,
@ -181,7 +184,7 @@ def main(*,
cmake_def_args.append(f"-DNCCL_INCLUDE_DIR={nccl_root}/include")
build_dir = get_build_dir(build_dir, build_type)
first_build = not build_dir.exists()
first_build = not Path(build_dir, "CMakeFiles").exists()
if clean and build_dir.exists():
clear_folder(build_dir) # Keep the folder in case it is mounted.
@ -220,9 +223,77 @@ def main(*,
targets.append("executorWorker")
source_dir = get_source_dir()
def install_conan():
# Determine the system ID
with Path("/etc/os-release").open("r") as f:
for line in f:
if line.startswith("ID="):
system_id = line.split("=")[1].strip()
break
else:
system_id = "unknown"
# Install Conan if it's not already installed
# TODO move this install to the container image
conan_path = "conan"
if "rocky" not in system_id:
build_run(f"\"{sys.executable}\" -m pip install conan==2.14.0")
else:
conan_dir = Path(build_dir, "tool/conan")
conan_dir.mkdir(parents=True, exist_ok=True)
conan_path = conan_dir / "bin/conan"
if not conan_path.exists():
with TemporaryDirectory() as tmpdir:
tmpdir_p = Path(tmpdir)
archive_p = tmpdir_p / "conan.tgz"
build_run(
f"wget --retry-connrefused -O {archive_p} https://github.com/conan-io/conan/releases/download/2.14.0/conan-2.14.0-linux-x86_64.tgz"
)
build_run(f"tar -C {conan_dir} -xf {archive_p}")
# Install dependencies with Conan
build_run(
f"{conan_path} remote add -verror --force tensorrt-llm https://edge.urm.nvidia.com/artifactory/api/conan/sw-tensorrt-llm-conan"
)
build_run(f"{conan_path} profile detect -f")
return conan_path
conan_path = install_conan()
# Build the NVRTC wrapper if the source directory exists
if nvrtc_wrapper_root is not None and Path(nvrtc_wrapper_root).exists():
print(f"Building the NVRTC wrapper from source in {nvrtc_wrapper_root}")
conan_data = Path(source_dir, "conandata.yml").read_text()
nvrtc_wrapper_version = re.search(
r'tensorrt_llm_nvrtc_wrapper:\s*(\S+)', conan_data).group(1)
build_run(
f"{conan_path} editable add {nvrtc_wrapper_root}/conan/nvrtc_wrapper --version {nvrtc_wrapper_version}"
)
nvrtc_wrapper_args = ""
if clean:
nvrtc_wrapper_args += " -c"
if configure_cmake:
nvrtc_wrapper_args += " --configure_cmake"
if use_ccache:
nvrtc_wrapper_args += " --use_ccache"
build_run(
f'"{sys.executable}" {nvrtc_wrapper_root}/scripts/build_wheel.py {nvrtc_wrapper_args} -a "{cuda_architectures}" -D "USE_CXX11_ABI=1;BUILD_NVRTC_WRAPPER=1" -l'
)
else:
# If the NVRTC wrapper source directory is not present, remove the editable NVRTC wrapper from the conan cache
build_run(
f"{conan_path} editable remove -r 'tensorrt_llm_nvrtc_wrapper/*'",
stdout=DEVNULL,
stderr=DEVNULL)
with working_directory(build_dir):
cmake_def_args = " ".join(cmake_def_args)
if clean or first_build or configure_cmake:
build_run(
f"{conan_path} install --remote=tensorrt-llm --output-folder={build_dir}/conan -s 'build_type={build_type}' {source_dir}"
)
cmake_def_args.append(
f"-DCMAKE_TOOLCHAIN_FILE={build_dir}/conan/conan_toolchain.cmake"
)
cmake_def_args = " ".join(cmake_def_args)
cmake_configure_command = (
f'cmake -DCMAKE_BUILD_TYPE="{build_type}" -DBUILD_PYT="{build_pyt}" -DBUILD_PYBIND="{build_pybind}"'
f' -DNVTX_DISABLE="{disable_nvtx}" -DBUILD_MICRO_BENCHMARKS={build_micro_benchmarks}'
@ -509,6 +580,12 @@ def add_arguments(parser: ArgumentParser):
help="Directory to find TensorRT headers/libs")
parser.add_argument("--nccl_root",
help="Directory to find NCCL headers/libs")
parser.add_argument(
"--nvrtc_wrapper_root",
default="/mnt/src/tensorrt_llm_nvrtc_wrapper",
help=
"Directory to find internal NVRTC wrapper source code. If the directory exists, the NVRTC wrapper will be built from source."
)
parser.add_argument("--build_dir",
type=Path,
help="Directory where cpp sources are built")