mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[https://nvbugs/5450855][fix] Cherry pick #6700 and #6702 from main (#6808)
Signed-off-by: Yiqing Yan <yiqingy@nvidia.com> Signed-off-by: Yanchao Lu <yanchaol@nvidia.com> Co-authored-by: Yiqing Yan <yiqingy@nvidia.com>
This commit is contained in:
parent
751d5f175c
commit
b4b1185af3
@ -1,7 +1,12 @@
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
from cuda import cuda, nvrtc
|
||||
|
||||
try:
|
||||
from cuda.bindings import driver as cuda
|
||||
from cuda.bindings import nvrtc
|
||||
except ImportError:
|
||||
from cuda import cuda, nvrtc
|
||||
|
||||
|
||||
def ASSERT_DRV(err):
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
accelerate>=0.25.0
|
||||
build
|
||||
colored
|
||||
cuda-python # Do not override the custom version of cuda-python installed in the NGC PyTorch image.
|
||||
cuda-python>=12,<13
|
||||
diffusers>=0.27.0
|
||||
lark
|
||||
mpi4py
|
||||
|
||||
@ -17,17 +17,20 @@ import struct
|
||||
import sys
|
||||
from typing import List, Tuple
|
||||
|
||||
from cuda import cuda, cudart
|
||||
from cuda.cudart import cudaError_t
|
||||
try:
|
||||
from cuda.bindings import driver as cuda
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cuda, cudart
|
||||
|
||||
from ._utils import mpi_comm
|
||||
from .logger import logger
|
||||
from .mapping import Mapping
|
||||
|
||||
|
||||
def _raise_if_error(error: cudaError_t | cuda.CUresult):
|
||||
if isinstance(error, cudaError_t):
|
||||
if error != cudaError_t.cudaSuccess:
|
||||
def _raise_if_error(error: cudart.cudaError_t | cuda.CUresult):
|
||||
if isinstance(error, cudart.cudaError_t):
|
||||
if error != cudart.cudaError_t.cudaSuccess:
|
||||
raise RuntimeError(f"CUDA Runtime API error: {repr(error)}")
|
||||
if isinstance(error, cuda.CUresult):
|
||||
if error != cuda.CUresult.CUDA_SUCCESS:
|
||||
|
||||
@ -18,7 +18,11 @@ from dataclasses import dataclass
|
||||
|
||||
import pynvml
|
||||
import torch
|
||||
from cuda import cuda
|
||||
|
||||
try:
|
||||
from cuda.bindings import driver as cuda
|
||||
except ImportError:
|
||||
from cuda import cuda
|
||||
|
||||
from ._dlpack_utils import pack_strided_memory
|
||||
from ._utils import mpi_comm
|
||||
|
||||
@ -5,7 +5,11 @@ from typing import Dict, Tuple, Union
|
||||
|
||||
import pynvml
|
||||
import torch
|
||||
from cuda import cudart
|
||||
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
|
||||
from tensorrt_llm._utils import DictConversion
|
||||
from tensorrt_llm.logger import logger
|
||||
|
||||
@ -29,7 +29,10 @@ import numpy as np
|
||||
import torch
|
||||
import tensorrt as trt
|
||||
# isort: on
|
||||
from cuda import cudart
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
|
||||
from tensorrt_llm.runtime.memory_pools.memory_pools_allocator import \
|
||||
MemoryPoolsAllocator
|
||||
|
||||
@ -13,7 +13,12 @@ import math
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import torch.nn.functional as F
|
||||
from cuda import cudart
|
||||
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
|
||||
from huggingface_hub import hf_hub_download
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
from safetensors import safe_open
|
||||
|
||||
@ -24,7 +24,11 @@ import sys
|
||||
|
||||
import psutil
|
||||
import pynvml
|
||||
from cuda import cuda
|
||||
|
||||
try:
|
||||
from cuda.bindings import driver as cuda
|
||||
except ImportError:
|
||||
from cuda import cuda
|
||||
|
||||
# Logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -18,7 +18,10 @@ from argparse import ArgumentParser
|
||||
# isort: off
|
||||
import torch
|
||||
# isort: on
|
||||
from cuda import cuda, cudart
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
|
||||
import tensorrt_llm as tllm
|
||||
from tensorrt_llm import Mapping, Tensor
|
||||
|
||||
@ -7,7 +7,11 @@ import time
|
||||
import traceback
|
||||
|
||||
import tensorrt as trt
|
||||
from cuda import cudart
|
||||
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
|
||||
import tensorrt_llm
|
||||
from tensorrt_llm import (AutoConfig, AutoModelForCausalLM, BuildConfig,
|
||||
|
||||
@ -34,7 +34,10 @@ MPI.pickle.__init__(
|
||||
def run_single_rank(dtype, strategy, message_size):
|
||||
import numpy as np
|
||||
import torch
|
||||
from cuda import cuda
|
||||
try:
|
||||
from cuda.bindings import driver as cuda
|
||||
except ImportError:
|
||||
from cuda import cuda
|
||||
|
||||
import tensorrt_llm
|
||||
from tensorrt_llm._torch.distributed import AllReduce, AllReduceStrategy
|
||||
|
||||
@ -21,7 +21,10 @@ import pytest
|
||||
import torch
|
||||
# isort: on
|
||||
|
||||
from cuda import cudart
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
from parameterized import parameterized
|
||||
from utils.util import create_session, run_session, unittest_name_func
|
||||
|
||||
|
||||
@ -21,7 +21,10 @@ import pytest
|
||||
import torch
|
||||
# isort: on
|
||||
|
||||
from cuda import cudart
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
from parameterized import parameterized
|
||||
from utils.util import create_session, run_session, unittest_name_func
|
||||
|
||||
|
||||
@ -21,7 +21,10 @@ import pytest
|
||||
import torch
|
||||
# isort: on
|
||||
|
||||
from cuda import cudart
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
from parameterized import parameterized
|
||||
from utils.util import create_session, run_session, unittest_name_func
|
||||
|
||||
|
||||
@ -21,7 +21,10 @@ import pytest
|
||||
import torch
|
||||
# isort: on
|
||||
|
||||
from cuda import cudart
|
||||
try:
|
||||
from cuda.bindings import runtime as cudart
|
||||
except ImportError:
|
||||
from cuda import cudart
|
||||
from parameterized import parameterized
|
||||
from utils.util import create_session, run_session, unittest_name_func
|
||||
|
||||
|
||||
@ -7,7 +7,13 @@ import pynvml
|
||||
import pytest
|
||||
import tensorrt as trt
|
||||
import torch
|
||||
from cuda import cuda, nvrtc
|
||||
|
||||
try:
|
||||
from cuda.bindings import driver as cuda
|
||||
from cuda.bindings import nvrtc
|
||||
except ImportError:
|
||||
from cuda import cuda, nvrtc
|
||||
|
||||
from parameterized import parameterized
|
||||
|
||||
import tensorrt_llm
|
||||
|
||||
Loading…
Reference in New Issue
Block a user