TensorRT-LLMs/.devcontainer/docker-compose.yml
dongxuy04 1e369658f1
feat: large-scale EP(part 6: Online EP load balancer integration for GB200 nvfp4) (#4818)
Signed-off-by: Dongxu Yang <78518666+dongxuy04@users.noreply.github.com>
Signed-off-by: ShiXiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
Co-authored-by: ShiXiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
2025-06-08 10:25:18 +08:00

33 lines
885 B
YAML

version: "3.9"
services:
tensorrt_llm-dev:
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202506021004-9420
network_mode: host
ipc: host
# For GPU usage and profiling
cap_add:
- SYS_PTRACE
- SYS_ADMIN
security_opt:
- seccomp:unconfined
# Delete this section if you don't have a GPU.
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: "all"
capabilities: [gpu]
volumes:
- ..:/workspaces/tensorrt_llm:cached
environment:
- CCACHE_DIR=/workspaces/tensorrt_llm/cpp/.ccache
- CCACHE_BASEDIR=/workspaces/tensorrt_llm
# Overrides default command so things don't shut down after the process ends.
command: /bin/sh -c "while sleep 1000; do :; done"