[https://nvbugs/5450855][fix] Cherry pick #6700 and #6702 from main (#6808)

Signed-off-by: Yiqing Yan <yiqingy@nvidia.com>
Signed-off-by: Yanchao Lu <yanchaol@nvidia.com>
Co-authored-by: Yiqing Yan <yiqingy@nvidia.com>
This commit is contained in:
Yanchao Lu 2025-08-12 18:11:47 +08:00 committed by GitHub
parent 751d5f175c
commit b4b1185af3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 76 additions and 20 deletions

View File

@ -1,7 +1,12 @@
import subprocess
import pytest
from cuda import cuda, nvrtc
try:
from cuda.bindings import driver as cuda
from cuda.bindings import nvrtc
except ImportError:
from cuda import cuda, nvrtc
def ASSERT_DRV(err):

View File

@ -3,7 +3,7 @@
accelerate>=0.25.0
build
colored
cuda-python # Do not override the custom version of cuda-python installed in the NGC PyTorch image.
cuda-python>=12,<13
diffusers>=0.27.0
lark
mpi4py

View File

@ -17,17 +17,20 @@ import struct
import sys
from typing import List, Tuple
from cuda import cuda, cudart
from cuda.cudart import cudaError_t
try:
from cuda.bindings import driver as cuda
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cuda, cudart
from ._utils import mpi_comm
from .logger import logger
from .mapping import Mapping
def _raise_if_error(error: cudaError_t | cuda.CUresult):
if isinstance(error, cudaError_t):
if error != cudaError_t.cudaSuccess:
def _raise_if_error(error: cudart.cudaError_t | cuda.CUresult):
if isinstance(error, cudart.cudaError_t):
if error != cudart.cudaError_t.cudaSuccess:
raise RuntimeError(f"CUDA Runtime API error: {repr(error)}")
if isinstance(error, cuda.CUresult):
if error != cuda.CUresult.CUDA_SUCCESS:

View File

@ -18,7 +18,11 @@ from dataclasses import dataclass
import pynvml
import torch
from cuda import cuda
try:
from cuda.bindings import driver as cuda
except ImportError:
from cuda import cuda
from ._dlpack_utils import pack_strided_memory
from ._utils import mpi_comm

View File

@ -5,7 +5,11 @@ from typing import Dict, Tuple, Union
import pynvml
import torch
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from tensorrt_llm._utils import DictConversion
from tensorrt_llm.logger import logger

View File

@ -29,7 +29,10 @@ import numpy as np
import torch
import tensorrt as trt
# isort: on
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from tensorrt_llm.runtime.memory_pools.memory_pools_allocator import \
MemoryPoolsAllocator

View File

@ -13,7 +13,12 @@ import math
from typing import Optional, Tuple
import torch.nn.functional as F
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from huggingface_hub import hf_hub_download
from PIL import Image, UnidentifiedImageError
from safetensors import safe_open

View File

@ -24,7 +24,11 @@ import sys
import psutil
import pynvml
from cuda import cuda
try:
from cuda.bindings import driver as cuda
except ImportError:
from cuda import cuda
# Logger
logger = logging.getLogger(__name__)

View File

@ -18,7 +18,10 @@ from argparse import ArgumentParser
# isort: off
import torch
# isort: on
from cuda import cuda, cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
import tensorrt_llm as tllm
from tensorrt_llm import Mapping, Tensor

View File

@ -7,7 +7,11 @@ import time
import traceback
import tensorrt as trt
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
import tensorrt_llm
from tensorrt_llm import (AutoConfig, AutoModelForCausalLM, BuildConfig,

View File

@ -34,7 +34,10 @@ MPI.pickle.__init__(
def run_single_rank(dtype, strategy, message_size):
import numpy as np
import torch
from cuda import cuda
try:
from cuda.bindings import driver as cuda
except ImportError:
from cuda import cuda
import tensorrt_llm
from tensorrt_llm._torch.distributed import AllReduce, AllReduceStrategy

View File

@ -21,7 +21,10 @@ import pytest
import torch
# isort: on
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

View File

@ -21,7 +21,10 @@ import pytest
import torch
# isort: on
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

View File

@ -21,7 +21,10 @@ import pytest
import torch
# isort: on
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

View File

@ -21,7 +21,10 @@ import pytest
import torch
# isort: on
from cuda import cudart
try:
from cuda.bindings import runtime as cudart
except ImportError:
from cuda import cudart
from parameterized import parameterized
from utils.util import create_session, run_session, unittest_name_func

View File

@ -7,7 +7,13 @@ import pynvml
import pytest
import tensorrt as trt
import torch
from cuda import cuda, nvrtc
try:
from cuda.bindings import driver as cuda
from cuda.bindings import nvrtc
except ImportError:
from cuda import cuda, nvrtc
from parameterized import parameterized
import tensorrt_llm