mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[TRTLLM-6143] feat: Improve dev container tagging (#5551)
Signed-off-by: ixlmar <206748156+ixlmar@users.noreply.github.com>
This commit is contained in:
parent
31699cbeb1
commit
04fa6c0cfc
10
.devcontainer/devcontainer.env
Normal file
10
.devcontainer/devcontainer.env
Normal file
@ -0,0 +1,10 @@
|
||||
# Environment variables used to configure the Dev Container setup.
|
||||
#
|
||||
# The syntax needs to be compatible with
|
||||
# https://docs.docker.com/compose/how-tos/environment-variables/variable-interpolation/#env-file-syntax
|
||||
#
|
||||
# Edit this file as necessary. For local changes not to be committed back
|
||||
# to the repository, create/edit devcontainer.env.user instead.
|
||||
HF_HOME_DEFAULT="${HOME}/.cache/huggingface"
|
||||
HF_HOME_XDG_DEFAULT="${XDG_CACHE_HOME:-${HF_HOME_DEFAULT}}"
|
||||
LOCAL_HF_HOME="${HF_HOME:-${HF_HOME_XDG_DEFAULT}}"
|
||||
@ -3,24 +3,18 @@
|
||||
{
|
||||
"name": "TRT-LLM Devcontainer",
|
||||
"dockerComposeFile": [
|
||||
"docker-compose.yml"
|
||||
"docker-compose.yml",
|
||||
"docker-compose.override.yml"
|
||||
],
|
||||
"service": "tensorrt_llm-dev",
|
||||
"remoteUser": "ubuntu",
|
||||
"containerEnv": {
|
||||
// "CCACHE_DIR" : "/home/coder/${localWorkspaceFolderBasename}/cpp/.ccache",
|
||||
// "CCACHE_BASEDIR" : "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"HF_TOKEN": "${localEnv:HF_TOKEN}",
|
||||
"HF_HOME": "/huggingface",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history"
|
||||
},
|
||||
"workspaceFolder": "/workspaces/tensorrt_llm",
|
||||
// "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
// "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localEnv:HOME}/.cache/huggingface,target=/huggingface,type=bind", // HF cache
|
||||
"source=/home/scratch.trt_llm_data/,target=/home/scratch.trt_llm_data/,type=bind,consistency=consistent"
|
||||
],
|
||||
"initializeCommand": "cd ${localWorkspaceFolder} && ./.devcontainer/make_env.py",
|
||||
// Note: sourcing .profile is required since we use a local user and the python interpreter is
|
||||
// global (/usr/bin/python). In this case, pip will default to a local user path which is not
|
||||
// by default in the PATH. In interactive devcontainer shells, .profile is sourced by default.
|
||||
@ -43,7 +37,9 @@
|
||||
// "ms-vscode.cmake-tools",
|
||||
// Git & Github
|
||||
// "GitHub.vscode-pull-request-github"
|
||||
"eamodio.gitlens"
|
||||
"eamodio.gitlens",
|
||||
// Docs
|
||||
"ms-vscode.live-server"
|
||||
],
|
||||
"settings": {
|
||||
"C_Cpp.intelliSenseEngine": "disabled",
|
||||
|
||||
8
.devcontainer/docker-compose.override-example.yml
Normal file
8
.devcontainer/docker-compose.override-example.yml
Normal file
@ -0,0 +1,8 @@
|
||||
# Example .devcontainer/docker-compose.override.yml
|
||||
version: "3.9"
|
||||
services:
|
||||
tensorrt_llm-dev:
|
||||
volumes:
|
||||
# Uncomment the following lines to enable
|
||||
# # Mount TRTLLM data volume:
|
||||
# - /home/scratch.trt_llm_data/:/home/scratch.trt_llm_data/:ro
|
||||
@ -1,7 +1,7 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
tensorrt_llm-dev:
|
||||
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202506271620-5539
|
||||
image: ${DEV_CONTAINER_IMAGE}
|
||||
network_mode: host
|
||||
ipc: host
|
||||
|
||||
@ -22,7 +22,8 @@ services:
|
||||
capabilities: [gpu]
|
||||
|
||||
volumes:
|
||||
- ..:/workspaces/tensorrt_llm:cached
|
||||
- ${SOURCE_DIR}:/workspaces/tensorrt_llm
|
||||
- ${LOCAL_HF_HOME}:/huggingface # HF cache
|
||||
|
||||
environment:
|
||||
- CCACHE_DIR=/workspaces/tensorrt_llm/cpp/.ccache
|
||||
|
||||
219
.devcontainer/make_env.py
Executable file
219
.devcontainer/make_env.py
Executable file
@ -0,0 +1,219 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
JENKINS_PROPS_PATH = Path("jenkins/current_image_tags.properties")
|
||||
DEV_CONTAINER_ENV_PATH = Path(".devcontainer/devcontainer.env")
|
||||
DEV_CONTAINER_USER_ENV_PATH = Path(".devcontainer/devcontainer.env.user")
|
||||
DOT_ENV_PATH = Path(".devcontainer/.env")
|
||||
COMPOSE_OVERRIDE_PATH = Path(".devcontainer/docker-compose.override.yml")
|
||||
COMPOSE_OVERRIDE_EXAMPLE_PATH = Path(
|
||||
".devcontainer/docker-compose.override-example.yml")
|
||||
|
||||
HOME_DIR_VAR = "HOME_DIR"
|
||||
SOURCE_DIR_VAR = "SOURCE_DIR"
|
||||
DEV_CONTAINER_IMAGE_VAR = "DEV_CONTAINER_IMAGE"
|
||||
BUILD_LOCAL_VAR = "BUILD_LOCAL"
|
||||
JENKINS_IMAGE_VAR = "LLM_DOCKER_IMAGE"
|
||||
LOCAL_HF_HOME_VAR = "LOCAL_HF_HOME"
|
||||
|
||||
LOGGER = logging.getLogger("make_env")
|
||||
|
||||
|
||||
def _load_env(env_files: List[Path]) -> Dict[str, str]:
|
||||
"""Evaluate files using 'sh' and return resulting environment."""
|
||||
with TemporaryDirectory("trtllm_make_env") as temp_dir:
|
||||
json_path = Path(temp_dir) / 'env.json'
|
||||
subprocess.run(
|
||||
("(echo set -a && cat " +
|
||||
" ".join(shlex.quote(str(env_file)) for env_file in env_files) +
|
||||
" && echo && echo exec /usr/bin/env python3 -c \"'import json; import os; print(json.dumps(dict(os.environ)))'\""
|
||||
+ f") | sh > {json_path}"),
|
||||
shell=True,
|
||||
check=True,
|
||||
)
|
||||
with open(json_path, "r") as f:
|
||||
env = json.load(f)
|
||||
return env
|
||||
|
||||
|
||||
def _detect_rootless() -> bool:
|
||||
proc = subprocess.run("./docker/detect_rootless.sh",
|
||||
capture_output=True,
|
||||
check=True,
|
||||
shell=True)
|
||||
return bool(int(proc.stdout.decode("utf-8").strip()))
|
||||
|
||||
|
||||
def _handle_rootless(env_inout: Dict[str, str]):
|
||||
is_rootless = _detect_rootless()
|
||||
if is_rootless:
|
||||
LOGGER.info("Docker Rootless Mode detected.")
|
||||
if HOME_DIR_VAR not in env_inout:
|
||||
raise ValueError(
|
||||
"Docker Rootless Mode requires setting HOME_DIR in devcontainer.env.user"
|
||||
)
|
||||
if SOURCE_DIR_VAR not in env_inout:
|
||||
raise ValueError(
|
||||
"Docker Rootless Mode requires setting SOURCE_DIR in devcontainer.env.user"
|
||||
)
|
||||
|
||||
# Handle HF_HOME
|
||||
if "HF_HOME" in os.environ and "HF_HOME" in env_inout:
|
||||
raise ValueError(
|
||||
"Docker Rootless Mode requires either not setting HF_HOME at all or overriding it in devcontainer.env.user"
|
||||
)
|
||||
if env_inout[LOCAL_HF_HOME_VAR].startswith(env_inout["HOME"]):
|
||||
env_inout[LOCAL_HF_HOME_VAR] = env_inout[LOCAL_HF_HOME_VAR].replace(
|
||||
env_inout["HOME"], env_inout[HOME_DIR_VAR], 1)
|
||||
else:
|
||||
env_inout[HOME_DIR_VAR] = env_inout["HOME"]
|
||||
env_inout[SOURCE_DIR_VAR] = os.getcwd()
|
||||
|
||||
|
||||
def _select_prebuilt_image(env: Dict[str, str]) -> Optional[str]:
|
||||
# Jenkins image
|
||||
candidate_images: List[str] = [env[JENKINS_IMAGE_VAR]]
|
||||
|
||||
# NGC images
|
||||
proc = subprocess.run(
|
||||
r"git tag --sort=creatordate --merged=HEAD | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+' | sed -E 's/^v(.*)$/\1/' | tac",
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
for git_tag in proc.stdout.splitlines():
|
||||
git_tag = git_tag.strip()
|
||||
candidate_images.append(f"nvcr.io/nvidia/tensorrt-llm/devel:{git_tag}")
|
||||
|
||||
# Check image availability
|
||||
for candidate_image in candidate_images:
|
||||
LOGGER.info(f"Trying image {candidate_image}")
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
f"docker run --rm -it --pull=missing --entrypoint=/bin/true {shlex.quote(candidate_image)}",
|
||||
check=True,
|
||||
shell=True)
|
||||
except subprocess.CalledProcessError:
|
||||
continue
|
||||
|
||||
LOGGER.info(f"Using image {candidate_image}")
|
||||
return candidate_image
|
||||
|
||||
LOGGER.info("No pre-built image found!")
|
||||
return None
|
||||
|
||||
|
||||
def _build_local_image() -> str:
|
||||
LOGGER.info("Building container image locally")
|
||||
|
||||
with TemporaryDirectory("trtllm_make_env") as temp_dir:
|
||||
log_path = Path(temp_dir) / "build.log"
|
||||
subprocess.run(
|
||||
f"make -C docker devel_build | tee {shlex.quote(str(log_path))}",
|
||||
check=True,
|
||||
shell=True,
|
||||
)
|
||||
with open(log_path) as f:
|
||||
build_log = f.read()
|
||||
|
||||
# Handle escaped and actual line breaks
|
||||
build_log_lines = re.sub(r"\\\n", " ", build_log).splitlines()
|
||||
for build_log_line in build_log_lines:
|
||||
tokens = shlex.split(build_log_line)
|
||||
if tokens[:3] != ["docker", "buildx", "build"]:
|
||||
continue
|
||||
token = None
|
||||
while tokens and not (token := tokens.pop(0)).startswith("--tag"):
|
||||
pass
|
||||
if token is None:
|
||||
continue
|
||||
if token.startswith("--arg="):
|
||||
token = token.removeprefix("--arg=")
|
||||
else:
|
||||
if not tokens:
|
||||
continue
|
||||
token = tokens.pop(0)
|
||||
return token # this is the image URI
|
||||
raise RuntimeError(
|
||||
f"Could not parse --tag argument from build log: {build_log}")
|
||||
|
||||
|
||||
def _ensure_compose_override():
|
||||
if not COMPOSE_OVERRIDE_PATH.exists():
|
||||
LOGGER.info(
|
||||
f"Creating initial {COMPOSE_OVERRIDE_PATH} from {COMPOSE_OVERRIDE_EXAMPLE_PATH}"
|
||||
)
|
||||
COMPOSE_OVERRIDE_PATH.write_bytes(
|
||||
COMPOSE_OVERRIDE_EXAMPLE_PATH.read_bytes())
|
||||
|
||||
|
||||
def _update_dot_env(env: Dict[str, str]):
|
||||
LOGGER.info(f"Updating {DOT_ENV_PATH}")
|
||||
|
||||
output_lines = [
|
||||
"# NOTE: This file is generated by make_env.py, modify devcontainer.env.user instead of this file.\n",
|
||||
"\n",
|
||||
]
|
||||
|
||||
for env_key, env_value in env.items():
|
||||
if os.environ.get(env_key) == env_value:
|
||||
# Only storing differences w.r.t. base env
|
||||
continue
|
||||
output_lines.append(f"{env_key}=\"{shlex.quote(env_value)}\"\n")
|
||||
|
||||
with open(DOT_ENV_PATH, "w") as f:
|
||||
f.writelines(output_lines)
|
||||
|
||||
|
||||
def main():
|
||||
env_files = [
|
||||
JENKINS_PROPS_PATH,
|
||||
DEV_CONTAINER_ENV_PATH,
|
||||
DEV_CONTAINER_USER_ENV_PATH,
|
||||
]
|
||||
|
||||
env = _load_env(env_files)
|
||||
_handle_rootless(env_inout=env)
|
||||
|
||||
# Determine container image to use
|
||||
image_uri = env.get(DEV_CONTAINER_IMAGE_VAR)
|
||||
if image_uri:
|
||||
LOGGER.info(f"Using user-provided container image: {image_uri}")
|
||||
else:
|
||||
build_local = bool(int(
|
||||
env[BUILD_LOCAL_VAR].strip())) if BUILD_LOCAL_VAR in env else None
|
||||
image_uri = None
|
||||
if not build_local:
|
||||
image_uri = _select_prebuilt_image(env)
|
||||
if image_uri is None:
|
||||
if build_local is False:
|
||||
raise RuntimeError(
|
||||
"No suitable container image found and local build disabled."
|
||||
)
|
||||
image_uri = _build_local_image()
|
||||
LOGGER.info(f"Using locally built container image: {image_uri}")
|
||||
env[DEV_CONTAINER_IMAGE_VAR] = image_uri
|
||||
|
||||
_ensure_compose_override()
|
||||
|
||||
_update_dot_env(env)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
LOGGER.error(f"{e.__class__.__name__}: {e}")
|
||||
sys.exit(-1)
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@ -59,9 +59,12 @@ llm-test-workspace/
|
||||
# Generated files
|
||||
cpp/include/tensorrt_llm/executor/version.h
|
||||
cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention/fmha_v2_cu/
|
||||
.devcontainer/.env
|
||||
|
||||
# User config files
|
||||
CMakeUserPresets.json
|
||||
compile_commands.json
|
||||
*.bin
|
||||
.dir-locals.el
|
||||
.devcontainer/devcontainer.env.user
|
||||
.devcontainer/docker-compose.override.yml
|
||||
|
||||
@ -16,7 +16,7 @@ GROUP_ID ?= $(shell id --group)
|
||||
GROUP_NAME ?= $(shell id --group --name)
|
||||
|
||||
# Try to detect Docker rootless mode
|
||||
IS_ROOTLESS ?= $(shell if [ "$$(docker context inspect --format '{{.Endpoints.docker.Host}}' "$$(docker context show)")" = "unix:///run/user/$(USER_ID)/docker.sock" ]; then echo 1; else echo 0; fi)
|
||||
IS_ROOTLESS ?= $(shell ./detect_rootless.sh)
|
||||
|
||||
# Set this to 1 to add the current user to the docker image and run the container with the user
|
||||
LOCAL_USER ?= 0
|
||||
@ -72,7 +72,7 @@ define rewrite_tag
|
||||
$(shell echo $(IMAGE_WITH_TAG) | sed "s/\/tensorrt-llm:/\/tensorrt-llm-staging:/g")
|
||||
endef
|
||||
|
||||
%_build: DEVEL_IMAGE = $(if $(findstring 1,$(JENKINS_DEVEL)),$(shell grep '^[[:space:]]*LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'))
|
||||
%_build: DEVEL_IMAGE = $(if $(findstring 1,$(JENKINS_DEVEL)),$(shell . ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE))
|
||||
%_build:
|
||||
@echo "Building docker image: $(IMAGE_WITH_TAG)"
|
||||
docker buildx build $(DOCKER_BUILD_OPTS) $(DOCKER_BUILD_ARGS) \
|
||||
@ -171,15 +171,15 @@ release_%: STAGE = release
|
||||
release_run: WORK_DIR = /app/tensorrt_llm
|
||||
|
||||
# For x86_64
|
||||
jenkins_%: IMAGE_WITH_TAG = $(shell grep '^[[:space:]]*LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
|
||||
jenkins_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE)
|
||||
jenkins_%: STAGE = tritondevel
|
||||
|
||||
# For aarch64
|
||||
jenkins-aarch64_%: IMAGE_WITH_TAG = $(shell grep '^[[:space:]]*LLM_SBSA_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
|
||||
jenkins-aarch64_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_SBSA_DOCKER_IMAGE)
|
||||
jenkins-aarch64_%: STAGE = tritondevel
|
||||
|
||||
# For x86_64
|
||||
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell grep '^[[:space:]]*LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
|
||||
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE)
|
||||
jenkins-rockylinux8_%: STAGE = tritondevel
|
||||
jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
|
||||
jenkins-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
|
||||
@ -197,11 +197,11 @@ trtllm_%: STAGE = release
|
||||
trtllm_%: PUSH_TO_STAGING := 0
|
||||
trtllm_%: DEVEL_IMAGE = $(shell \
|
||||
if [ "$(PLATFORM)" = "amd64" ]; then \
|
||||
grep '^[[:space:]]*LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'; \
|
||||
. ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE; \
|
||||
elif [ "$(PLATFORM)" = "arm64" ]; then \
|
||||
grep '^[[:space:]]*LLM_SBSA_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'; \
|
||||
. ../jenkins/current_image_tags.properties && echo $$LLM_SBSA_DOCKER_IMAGE; \
|
||||
fi)
|
||||
trtllm_%: IMAGE_NAME = $(shell grep '^[[:space:]]*IMAGE_NAME = ' ../jenkins/BuildDockerImage.groovy | grep -o '".*"' | tr -d '"')
|
||||
trtllm_%: IMAGE_NAME = $(shell . ../jenkins/current_image_tags.properties && echo $$IMAGE_NAME)
|
||||
trtllm_%: IMAGE_TAG = $(shell git rev-parse --abbrev-ref HEAD | tr '/' '_')-$(PLATFORM)
|
||||
trtllm_run: WORK_DIR = /app/tensorrt_llm
|
||||
|
||||
|
||||
7
docker/detect_rootless.sh
Executable file
7
docker/detect_rootless.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
if [ "$(docker context inspect --format '{{.Endpoints.docker.Host}}' "$(docker context show)")" = "unix:///run/user/$(id -u)/docker.sock" ]; then
|
||||
echo 1
|
||||
else
|
||||
echo 0
|
||||
fi
|
||||
@ -133,6 +133,7 @@ Welcome to TensorRT-LLM's Documentation!
|
||||
reference/precision.md
|
||||
reference/memory.md
|
||||
reference/ci-overview.md
|
||||
reference/dev-containers.md
|
||||
|
||||
|
||||
.. toctree::
|
||||
|
||||
100
docs/source/reference/dev-containers.md
Normal file
100
docs/source/reference/dev-containers.md
Normal file
@ -0,0 +1,100 @@
|
||||
# Using Dev Containers
|
||||
|
||||
The TensorRT-LLM repository contains a [Dev Containers](https://containers.dev/)
|
||||
configuration in `.devcontainer`. These files are intended for
|
||||
use with [Visual Studio Code](https://code.visualstudio.com/).
|
||||
|
||||
Due to the various container options supported by TensorRT-LLM (see
|
||||
[](/installation/build-from-source-linux.md) and
|
||||
<https://github.com/NVIDIA/TensorRT-LLM/tree/main/docker>), the Dev
|
||||
Container configuration also offers some degree of customization.
|
||||
|
||||
Generally, the `initializeCommand` in `devcontainer.json` will run
|
||||
`make_env.py` to generate an
|
||||
[`.env` file for `docker-compose`](https://docs.docker.com/compose/how-tos/environment-variables/variable-interpolation/#env-file-syntax).
|
||||
Most importantly, the `docker-compose.yml` uses `${DEV_CONTAINER_IMAGE}`
|
||||
as base image.
|
||||
The generated `.devcontainer/.env` is not tracked by Git and combines
|
||||
data from the following sources:
|
||||
|
||||
* `jenkins/current_image_tags.properties` which contains the image tags
|
||||
currently used by CI.
|
||||
|
||||
* `.devcontainer/devcontainer.env` which contains common configuration
|
||||
settings and is tracked by Git.
|
||||
|
||||
* `.devcontainer/devcontainer.env.user` (optional) which is ignored by
|
||||
Git and can be edited to customize the Dev Container behavior.
|
||||
|
||||
The source files are processed using `sh`, in the order in which they
|
||||
are listed above. Thus, features like command substitution are supported.
|
||||
|
||||
The following sections provide more detail on particular Dev Container
|
||||
configuration parameters which can be customized.
|
||||
|
||||
```{note}
|
||||
After editing any of the configuration files, it may be necessary
|
||||
to execute the "Dev Containers: Reopen Folder in SSH" (if applicable) and
|
||||
"Dev Containers: Rebuild and Reopen in Container" Visual Studio Code
|
||||
commands.
|
||||
```
|
||||
|
||||
## Container image selection
|
||||
|
||||
By default, `make_env.py` will attempt to auto-select a suitable container
|
||||
image as follows:
|
||||
|
||||
1. Reuse the development container image used by CI. This requires access
|
||||
to the NVIDIA internal artifact repository.
|
||||
|
||||
1. Use the most recent
|
||||
[NGC Development container image](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/devel)
|
||||
associated with a Git tag which is reachable from the currently checked
|
||||
out commit.
|
||||
|
||||
1. Build a development image locally.
|
||||
|
||||
Set `DEV_CONTAINER_IMAGE=<some_uri>` to bypass the aforementioned discovery
|
||||
mechanism if desired.
|
||||
|
||||
By setting `LOCAL_BUILD=0`, the local image build can be disabled. In this
|
||||
case, execution fails if no suitable pre-built image is found.
|
||||
|
||||
Setting `LOCAL_BUILD=1` forces building of a local image, even if a pre-built
|
||||
image is available.
|
||||
|
||||
## Volume Mounts
|
||||
|
||||
[Docker volume mounts](https://docs.docker.com/engine/storage/volumes/#use-a-volume-with-docker-compose) can be customized by editing
|
||||
`docker-compose.yml`, which allows using any variables defined in `.env`.
|
||||
|
||||
By default, the Dev Container configuration mounts the VS Code workspace into
|
||||
`/workspaces/tensorrt_llm` and `~/.cache/huggingface` into `/huggingface`.
|
||||
The source paths can be overridden by setting `SOURCE_DIR` and `HOME_DIR`
|
||||
in `.devcontainer/devcontainer.env.user`, respectively. This is of
|
||||
particular relevance when using
|
||||
[Docker Rootless Mode](https://docs.docker.com/engine/security/rootless/),
|
||||
which requires configuring UID/GID translation using a tool like `bindfs`.
|
||||
The Dev Container scripts contain heuristics to detect Docker Rootless
|
||||
Mode and will issue an error if these variables are not set.
|
||||
An analogous logic is applied to `HF_HOME`.
|
||||
|
||||
|
||||
## Overriding Docker Compose configuration
|
||||
|
||||
When starting the container, `.devcontainer/docker-compose.yml`
|
||||
is [merged](https://docs.docker.com/compose/how-tos/multiple-compose-files/merge/) with
|
||||
`.devcontainer/docker-compose.override.yml`. The latter file is not
|
||||
tracked by Git and will be created by `make_env.py` if it does not exist.
|
||||
|
||||
This mechanism can be used, e.g., to add custom volume mounts:
|
||||
|
||||
```{literalinclude} /../../.devcontainer/docker-compose.override-example.yml
|
||||
```
|
||||
|
||||
It is possible to conditionally mount volumes by combining, e.g.,
|
||||
[this method] (https://stackoverflow.com/a/61954812) and shell command
|
||||
substitution in `.devcontainer/devcontainer.env.user`.
|
||||
|
||||
If no `.devcontainer/docker-compose.override.yml` file is found, the Dev Container
|
||||
initialization script will create one with the contents listed above.
|
||||
@ -26,12 +26,23 @@ ARTIFACT_PATH = env.artifactPath ? env.artifactPath : "sw-tensorrt-generic/llm-a
|
||||
UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${BUILD_NUMBER}"
|
||||
|
||||
// Container configuration
|
||||
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
|
||||
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
|
||||
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202506271620-5539"
|
||||
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202506271620-5539"
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202506271620-5539"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202506271620-5539"
|
||||
def getContainerURIs()
|
||||
{
|
||||
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
|
||||
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
|
||||
tagProps = readProperties file: "${LLM_ROOT}/jenkins/current_image_tags.properties", interpolate: true
|
||||
uris = [:]
|
||||
keys = [
|
||||
"LLM_DOCKER_IMAGE",
|
||||
"LLM_SBSA_DOCKER_IMAGE",
|
||||
"LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE",
|
||||
"LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"
|
||||
]
|
||||
for (key in keys) {
|
||||
uris[key] = tagProps[key]
|
||||
}
|
||||
return uris
|
||||
}
|
||||
|
||||
// TODO: Move common variables to an unified location
|
||||
BUILD_CORES_REQUEST = "8"
|
||||
@ -315,14 +326,17 @@ def echoNodeAndGpuInfo(pipeline, stageName)
|
||||
|
||||
def setupPipelineEnvironment(pipeline, testFilter, globalVars)
|
||||
{
|
||||
setupPipelineSpec = createKubernetesPodConfig(LLM_DOCKER_IMAGE, "build")
|
||||
image = "urm.nvidia.com/docker/golang:1.22"
|
||||
setupPipelineSpec = createKubernetesPodConfig(image, "build")
|
||||
trtllm_utils.launchKubernetesPod(pipeline, setupPipelineSpec, "trt-llm", {
|
||||
sh "env | sort"
|
||||
updateGitlabCommitStatus name: "${BUILD_STATUS_NAME}", state: 'running'
|
||||
echo "Using GitLab repo: ${LLM_REPO}."
|
||||
sh "git config --global --add safe.directory \"*\""
|
||||
// NB: getContainerURIs reads files in ${LLM_ROOT}/jenkins/
|
||||
if (env.gitlabMergeRequestLastCommit) {
|
||||
env.gitlabCommit = env.gitlabMergeRequestLastCommit
|
||||
trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, LLM_ROOT, true, true)
|
||||
} else {
|
||||
branch = env.gitlabBranch ? env.gitlabBranch : "main"
|
||||
trtllm_utils.checkoutSource(LLM_REPO, branch, LLM_ROOT, true, true)
|
||||
@ -334,6 +348,9 @@ def setupPipelineEnvironment(pipeline, testFilter, globalVars)
|
||||
testFilter[(MULTI_GPU_FILE_CHANGED)] = getMultiGpuFileChanged(pipeline, testFilter, globalVars)
|
||||
testFilter[(ONLY_PYTORCH_FILE_CHANGED)] = getOnlyPytorchFileChanged(pipeline, testFilter, globalVars)
|
||||
testFilter[(AUTO_TRIGGER_TAG_LIST)] = getAutoTriggerTagList(pipeline, testFilter, globalVars)
|
||||
getContainerURIs().each { k, v ->
|
||||
globalVars[k] = v
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@ -865,9 +882,9 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
String globalVarsJson = writeJSON returnText: true, json: globalVars
|
||||
parameters += [
|
||||
'enableFailFast': enableFailFast,
|
||||
'dockerImage': LLM_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy310': LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy312': LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
'dockerImage': globalVars["LLM_DOCKER_IMAGE"],
|
||||
'wheelDockerImagePy310': globalVars["LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE"],
|
||||
'wheelDockerImagePy312': globalVars["LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"],
|
||||
'globalVars': globalVarsJson,
|
||||
]
|
||||
|
||||
@ -900,15 +917,14 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
}
|
||||
try {
|
||||
parameters = getCommonParameters()
|
||||
|
||||
String testFilterJson = writeJSON returnText: true, json: testFilter
|
||||
String globalVarsJson = writeJSON returnText: true, json: globalVars
|
||||
parameters += [
|
||||
'enableFailFast': enableFailFast,
|
||||
'testFilter': testFilterJson,
|
||||
'dockerImage': LLM_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy310': LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE,
|
||||
'wheelDockerImagePy312': LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
'dockerImage': globalVars["LLM_DOCKER_IMAGE"],
|
||||
'wheelDockerImagePy310': globalVars["LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE"],
|
||||
'wheelDockerImagePy312': globalVars["LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"],
|
||||
'globalVars': globalVarsJson,
|
||||
]
|
||||
|
||||
@ -965,7 +981,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
String globalVarsJson = writeJSON returnText: true, json: globalVars
|
||||
parameters += [
|
||||
'enableFailFast': enableFailFast,
|
||||
"dockerImage": LLM_SBSA_DOCKER_IMAGE,
|
||||
"dockerImage": globalVars["LLM_SBSA_DOCKER_IMAGE"],
|
||||
'globalVars': globalVarsJson,
|
||||
]
|
||||
|
||||
@ -999,13 +1015,12 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
}
|
||||
try {
|
||||
def parameters = getCommonParameters()
|
||||
|
||||
String testFilterJson = writeJSON returnText: true, json: testFilter
|
||||
String globalVarsJson = writeJSON returnText: true, json: globalVars
|
||||
parameters += [
|
||||
'enableFailFast': enableFailFast,
|
||||
'testFilter': testFilterJson,
|
||||
"dockerImage": LLM_SBSA_DOCKER_IMAGE,
|
||||
"dockerImage": globalVars["LLM_SBSA_DOCKER_IMAGE"],
|
||||
'globalVars': globalVarsJson,
|
||||
]
|
||||
|
||||
|
||||
12
jenkins/current_image_tags.properties
Normal file
12
jenkins/current_image_tags.properties
Normal file
@ -0,0 +1,12 @@
|
||||
# NOTE: Although the Java Properties file format is more flexible, cf.
|
||||
# https://docs.oracle.com/cd/E23095_01/Platform.93/ATGProgGuide/html/s0204propertiesfileformat01.html
|
||||
# keep the format compatible with
|
||||
# https://code.visualstudio.com/remote/advancedcontainers/environment-variables#_option-2-use-an-env-file
|
||||
# for reuse in Dev Containers configuration.
|
||||
# Also, the file needs to be parseable by 'sh' for reuse by docker/Makefile.
|
||||
LLM_DOCKER_IMAGE_URI=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
|
||||
LLM_DOCKER_IMAGE_TAG_SUFFIX=-trt10.11.0.33-skip-tritondevel-202506271620-5539
|
||||
LLM_DOCKER_IMAGE=${LLM_DOCKER_IMAGE_URI}:pytorch-25.05-py3-x86_64-ubuntu24.04${LLM_DOCKER_IMAGE_TAG_SUFFIX}
|
||||
LLM_SBSA_DOCKER_IMAGE=${LLM_DOCKER_IMAGE_URI}:pytorch-25.05-py3-aarch64-ubuntu24.04${LLM_DOCKER_IMAGE_TAG_SUFFIX}
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=${LLM_DOCKER_IMAGE_URI}:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310${LLM_DOCKER_IMAGE_TAG_SUFFIX}
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=${LLM_DOCKER_IMAGE_URI}:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312${LLM_DOCKER_IMAGE_TAG_SUFFIX}
|
||||
Loading…
Reference in New Issue
Block a user