mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-27 14:13:34 +08:00
12 lines
398 B
Python
12 lines
398 B
Python
from test_llm_models import llm_test_harness, qwen2_model_path, sampling_params
|
|
from utils.util import skip_single_gpu
|
|
|
|
|
|
@skip_single_gpu
|
|
def test_llm_qwen2_tp2():
|
|
llm_test_harness(qwen2_model_path,
|
|
inputs=['A B C'],
|
|
references=['D E F G H I J K L M'],
|
|
sampling_params=sampling_params,
|
|
tensor_parallel_size=2)
|