TensorRT-LLMs/tests/llmapi/test_llm_models_multi_gpu.py
石晓伟 8f91cff22e
TensorRT-LLM Release 0.15.0 (#2529)
Co-authored-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
2024-12-04 13:44:56 +08:00

63 lines
2.1 KiB
Python

import os
import sys
try:
from .test_llm_models import (baichuan2_7b_model_path, falcon_model_path,
gemma_2b_model_path, gptj_model_path,
llm_test_harness, qwen2_model_path,
sampling_params)
except ImportError:
from test_llm_models import (baichuan2_7b_model_path, falcon_model_path,
gemma_2b_model_path, gptj_model_path,
llm_test_harness, qwen2_model_path,
sampling_params)
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from utils.util import skip_single_gpu
@skip_single_gpu
def test_llm_gptj_tp2():
llm_test_harness(gptj_model_path,
inputs=["A B C"],
references=["D E F G H I J K L M"],
sampling_params=sampling_params,
tensor_parallel_size=2)
@skip_single_gpu
def test_llm_falcon_tp2():
llm_test_harness(falcon_model_path,
inputs=['A B C'],
references=['D E F G H I J K L M'],
sampling_params=sampling_params,
tensor_parallel_size=2)
@skip_single_gpu
def test_llm_baichuan2_7b_tp2():
llm_test_harness(baichuan2_7b_model_path,
inputs=['A B C'],
references=['D E F G H I J K L M'],
sampling_params=sampling_params,
tensor_parallel_size=2,
trust_remote_code=True)
@skip_single_gpu
def test_llm_gemma_2b_tp2():
llm_test_harness(gemma_2b_model_path,
inputs=['A B C'],
references=['D E F G H I J K L M'],
sampling_params=sampling_params,
tensor_parallel_size=2)
@skip_single_gpu
def test_llm_qwen2_tp2():
llm_test_harness(qwen2_model_path,
inputs=['A B C'],
references=['D E F G H I J K L M'],
sampling_params=sampling_params,
tensor_parallel_size=2)