mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
52 lines
1.7 KiB
Python
52 lines
1.7 KiB
Python
from test_llm_models import (baichuan2_7b_model_path, falcon_model_path,
|
|
gemma_2b_model_path, gptj_model_path,
|
|
llm_test_harness, qwen2_model_path,
|
|
sampling_params)
|
|
from utils.util import skip_single_gpu
|
|
|
|
|
|
@skip_single_gpu
|
|
def test_llm_gptj_tp2():
|
|
llm_test_harness(gptj_model_path,
|
|
inputs=["A B C"],
|
|
references=["D E F G H I J K L M"],
|
|
sampling_params=sampling_params,
|
|
tensor_parallel_size=2)
|
|
|
|
|
|
@skip_single_gpu
|
|
def test_llm_falcon_tp2():
|
|
llm_test_harness(falcon_model_path,
|
|
inputs=['A B C'],
|
|
references=['D E F G H I J K L M'],
|
|
sampling_params=sampling_params,
|
|
tensor_parallel_size=2)
|
|
|
|
|
|
@skip_single_gpu
|
|
def test_llm_baichuan2_7b_tp2():
|
|
llm_test_harness(baichuan2_7b_model_path,
|
|
inputs=['A B C'],
|
|
references=['D E F G H I J K L M'],
|
|
sampling_params=sampling_params,
|
|
tensor_parallel_size=2,
|
|
trust_remote_code=True)
|
|
|
|
|
|
@skip_single_gpu
|
|
def test_llm_gemma_2b_tp2():
|
|
llm_test_harness(gemma_2b_model_path,
|
|
inputs=['A B C'],
|
|
references=['D E F G H I J K L M'],
|
|
sampling_params=sampling_params,
|
|
tensor_parallel_size=2)
|
|
|
|
|
|
@skip_single_gpu
|
|
def test_llm_qwen2_tp2():
|
|
llm_test_harness(qwen2_model_path,
|
|
inputs=['A B C'],
|
|
references=['D E F G H I J K L M'],
|
|
sampling_params=sampling_params,
|
|
tensor_parallel_size=2)
|