TensorRT-LLMs/tests/integration/defs/cpp/conftest.py
Omer Ullman Argov bdc8dfebc3
[fix][ci] dont build wheel for cpp tests (#5443)
Signed-off-by: Omer Ullman Argov <118735753+omera-nv@users.noreply.github.com>
2025-06-26 00:13:47 +03:00

245 lines
6.5 KiB
Python

import glob
import logging as _logger
import os as _os
import pathlib as _pl
import shutil
import sys as _sys
import time
import defs.cpp.cpp_common as _cpp
import pytest
build_script_dir = _pl.Path(
__file__).parent.resolve().parent.parent.parent.parent / "scripts"
assert build_script_dir.is_dir()
_sys.path.append(str(build_script_dir))
from build_wheel import main as build_trt_llm
from defs.conftest import llm_models_root
@pytest.fixture(scope="session")
def build_dir():
return _cpp.find_build_dir()
@pytest.fixture(scope="session")
def cpp_resources_dir():
return _pl.Path("cpp") / "tests" / "resources"
@pytest.fixture(scope="session")
def model_cache():
return llm_models_root()
@pytest.fixture(scope="session")
def model_cache_arg(model_cache):
return ["--model_cache", model_cache] if model_cache else []
@pytest.fixture(scope="session")
def python_exe():
return _sys.executable
@pytest.fixture(scope="session")
def root_dir():
return _cpp.find_root_dir()
@pytest.fixture(scope="session")
def lora_setup(root_dir, cpp_resources_dir, python_exe):
cpp_script_dir = cpp_resources_dir / "scripts"
cpp_data_dir = cpp_resources_dir / "data"
generate_lora_data_args_tp1 = [
python_exe,
f"{cpp_script_dir}/generate_test_lora_weights.py",
f"--out-dir={cpp_data_dir}/lora-test-weights-tp1",
"--tp-size=1",
]
generate_lora_data_args_tp2 = [
python_exe,
f"{cpp_script_dir}/generate_test_lora_weights.py",
f"--out-dir={cpp_data_dir}/lora-test-weights-tp2",
"--tp-size=2",
]
generate_multi_lora_tp2_args = [
python_exe,
f"{cpp_script_dir}/generate_test_lora_weights.py",
f"--out-dir={cpp_data_dir}/multi_lora",
"--tp-size=2",
"--num-loras=128",
]
generate_gpt2_lora_data_args_tp1 = [
python_exe,
f"{cpp_script_dir}/generate_test_lora_weights.py",
f"--out-dir={cpp_data_dir}/lora-test-weights-gpt2-tp1",
"--tp-size=1",
"--hidden-size=768",
"--num-layers=12",
"--config-ids-filter=0",
"--no-generate-cache-pages",
]
generate_lora_data_args_prefetch_task_3 = [
python_exe,
f"{cpp_script_dir}/generate_test_lora_weights.py",
f"--out-dir={cpp_data_dir}/lora_prefetch/3",
"--target-file-name=model.lora_weights.npy",
"--config-file-name=model.lora_config.npy",
]
generate_lora_data_args_prefetch_task_5 = [
python_exe,
f"{cpp_script_dir}/generate_test_lora_weights.py",
f"--out-dir={cpp_data_dir}/lora_prefetch/5",
"--target-file-name=model.lora_weights.npy",
"--config-file-name=model.lora_config.npy",
]
_cpp.run_command(generate_lora_data_args_tp1, cwd=root_dir, timeout=100)
_cpp.run_command(generate_lora_data_args_tp2, cwd=root_dir, timeout=100)
_cpp.run_command(generate_multi_lora_tp2_args, cwd=root_dir, timeout=100)
_cpp.run_command(generate_gpt2_lora_data_args_tp1,
cwd=root_dir,
timeout=100)
_cpp.run_command(generate_lora_data_args_prefetch_task_3,
cwd=root_dir,
timeout=100)
_cpp.run_command(generate_lora_data_args_prefetch_task_5,
cwd=root_dir,
timeout=100)
@pytest.fixture(scope="session")
def install_additional_requirements(python_exe, root_dir):
def _install(model_name: str):
if model_name == "mamba":
_cpp.run_command(
[python_exe, "-m", "pip", "install", "transformers>=4.39.0"],
cwd=root_dir,
env=_os.environ,
timeout=300,
)
elif model_name == "recurrentgemma":
_cpp.run_command(
[
python_exe,
"-m",
"pip",
"install",
"-r",
"examples/models/core/recurrentgemma/requirements.txt",
],
cwd=root_dir,
env=_os.environ,
timeout=300,
)
return _install
@pytest.fixture(scope="session")
def build_google_tests(request, build_dir):
cuda_arch = f"{request.param}-real"
print(f"Using CUDA arch: {cuda_arch}")
build_trt_llm(cuda_architectures=cuda_arch,
job_count=12,
use_ccache=True,
clean=True,
trt_root="/usr/local/tensorrt",
nixl_root="/opt/nvidia/nvda_nixl",
skip_building_wheel=True)
make_google_tests = [
"cmake",
"--build",
".",
"--config",
"Release",
"-j",
"--target",
"google-tests",
]
_cpp.run_command(make_google_tests, cwd=build_dir, timeout=300)
@pytest.fixture(scope="session")
def build_benchmarks(build_google_tests, build_dir):
make_benchmarks = [
"cmake",
"--build",
".",
"--config",
"Release",
"-j",
"--target",
"benchmarks",
]
_cpp.run_command(make_benchmarks, cwd=build_dir, timeout=300)
@pytest.fixture(scope="session")
def prepare_model(
root_dir,
cpp_resources_dir,
python_exe,
model_cache_arg,
install_additional_requirements,
):
def _prepare(model_name: str, run_fp8=False):
install_additional_requirements(model_name)
start_time = time.time()
_cpp.prepare_model_tests(
model_name=model_name,
python_exe=python_exe,
root_dir=root_dir,
resources_dir=cpp_resources_dir,
model_cache_arg=model_cache_arg,
)
duration = time.time() - start_time
print(f"Built model: {model_name}")
print(f"Duration: {duration} seconds")
return _prepare
@pytest.fixture(scope="function", autouse=True)
def keep_log_files(llm_root):
"Backup previous cpp test results when run multiple ctest"
results_dir = f"{llm_root}/cpp/build"
yield
backup_dir = f"{llm_root}/cpp/build_backup"
_os.makedirs(backup_dir, exist_ok=True)
# Copy XML files to backup directory
xml_files = glob.glob(f"{results_dir}/*.xml")
if xml_files:
for xml_file in xml_files:
try:
shutil.copy(xml_file, backup_dir)
_logger.info(f"Copied {xml_file} to {backup_dir}")
except Exception as e:
_logger.error(f"Error copying {xml_file}: {str(e)}")
else:
_logger.info("No XML files found in the build directory.")