TensorRT-LLMs/tests/integration/defs/conftest.py
bhsueh_NV 322ac565fc
chore: clean some ci of qa test (#3083)
* move some models to examples/models/contrib

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* update the document

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* remove arctic, blip2, cogvlm, dbrx from qa test list

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* remove tests of dit, mmdit and stdit from qa test

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* remove grok, jais, sdxl, skywork, smaug from qa test list

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* re-organize the glm examples

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* fix issues after running pre-commit

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* fix some typo in glm_4_9b readme

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

* fix bug

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>

---------

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
2025-03-31 14:30:41 +08:00

2289 lines
81 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import datetime
import json
import os
import re
import shutil
import subprocess as sp
import tempfile
import time
import urllib.request
from functools import wraps
from pathlib import Path
import defs.ci_profiler
import psutil
import pytest
import yaml
from tensorrt_llm.bindings import ipc_nvls_supported
from .perf.gpu_clock_lock import GPUClockLock
from .perf.session_data_writer import SessionDataWriter
from .test_list_parser import (TestCorrectionMode, apply_waives,
get_test_name_corrections_v2, handle_corrections,
modify_by_test_list, preprocess_test_list_lines)
from .trt_test_alternative import (call, check_output, exists, is_windows,
is_wsl, makedirs, print_info, print_warning,
wsl_to_win_path)
try:
from llm import trt_environment
except ImportError:
trt_environment = None
# TODO: turn off this when the nightly storage issue is resolved.
DEBUG_CI_STORAGE = os.environ.get("DEBUG_CI_STORAGE", False)
GITLAB_API_USER = os.environ.get("GITLAB_API_USER")
GITLAB_API_TOKEN = os.environ.get("GITLAB_API_TOKEN")
EVALTOOL_REPO_URL = os.environ.get("EVALTOOL_REPO_URL")
LLM_GATE_WAY_CLIENT_ID = os.environ.get("LLM_GATE_WAY_CLIENT_ID")
LLM_GATE_WAY_TOKEN = os.environ.get("LLM_GATE_WAY_TOKEN")
def print_storage_usage(path, tag, capfd):
if DEBUG_CI_STORAGE:
stat = shutil.disk_usage(path)
with capfd.disabled():
print_info(
f"\nUsage of {path} {stat} @{tag}, used in GB: {stat.used/(2**30)}"
)
def wget(url, out):
filename = os.path.basename(url)
os.makedirs(out, exist_ok=True)
urllib.request.urlretrieve(url, os.path.join(out, filename))
def llm_models_root() -> str:
'''return LLM_MODELS_ROOT path if it is set in env, assert when it's set but not a valid path
'''
DEFAULT_LLM_MODEL_ROOT = os.path.join("/scratch.trt_llm_data", "llm-models")
LLM_MODELS_ROOT = os.environ.get("LLM_MODELS_ROOT", DEFAULT_LLM_MODEL_ROOT)
return LLM_MODELS_ROOT
def tests_path() -> Path:
return (Path(os.path.dirname(__file__)) / "../..").resolve()
def unittest_path() -> Path:
return tests_path() / "unittest"
def integration_path() -> Path:
return tests_path() / "integration"
def cached_in_llm_models_root(path_relative_to_llm_models_root,
fail_if_path_is_invalid=False):
'''
Use this decorator to declare a cached path in the LLM_MODELS_ROOT directory.
That decorator is intended to be used with pytest.fixture functions which prepare and return a data path for some tests.
The cache is only queried when llm_models_root() does not return None, and the cache is skipped otherwise.
When the cache is queried, and the specified path does not exist, the function:
- Triggers an AssertionFailure when fail_if_path_is_invalid is True,
- Ignore the invalid path and fallbacks to calling the fixture otherwise.
The purpose of the `fail_if_path_is_invalid` is the following:
- If you submit a test and the data is not in the cached NFS LLM_MODELS_ROOT dir yet, you can use `fail_if_path_is_invalid=False` (the default).
In that case, the fixture will use the fallback path and ignore the cache miss in the CI. After submitting the data to the cached NFS LLM_MODELS_ROOT dir,
your test will automatically pickup the cached data.
- If your data is known to always be in the LLM_MODELS_ROOT, and you want to make sure that the test fails loudly when it misses in cache,
you should specify fail_if_path_is_invalid=True to force the failure. It is useful for when a cache miss will cause a big performance drop for the CI jobs.
Example:
If you have a fixture which downloads the SantaCoder repo and returns its path for one SantaCoder test, you can do the following:
@pytest.fixture(scope="session")
def llm_gpt2_santacoder_model_root(llm_venv):
workspace = llm_venv.get_working_directory()
gpt2_santacoder_model_root = os.path.join(workspace, "santacoder")
call(
f"git clone https://huggingface.co/bigcode/santacoder {gpt2_santacoder_model_root}",
shell=True)
return gpt2_santacoder_model_root
At some point, if you decide to cache the SantaCoder in the LLM_MODELS_ROOT, you can decorate the fixture to enforce the test to
use the ${LLM_MODELS_ROOT}/santacoder cached directory. You can upload SantaCoder to that location before or after submitting
this code since there is a fallback path to clone the repo if it is not found in cache.
@pytest.fixture(scope="session")
@cached_in_llm_models_root("santacoder")
def llm_gpt2_santacoder_model_root(llm_venv):
... keep the original code
'''
def wrapper(f):
@wraps(f)
def decorated(*args, **kwargs):
if llm_models_root() is not None:
cached_dir = f"{llm_models_root()}/{path_relative_to_llm_models_root}"
if os.path.exists(cached_dir):
return cached_dir
elif fail_if_path_is_invalid:
assert False, f"{cached_dir} does not exist, and fail_if_path_is_invalid is True, please check the cache directory"
return f(*args, **kwargs)
return decorated
return wrapper
# Fixture about whether the current pipeline is running in TRT environment.
@pytest.fixture(scope="session")
def is_trt_environment():
return trt_environment is not None
# Helper function to get llm_root. Do not define it as a fixture so that this
# function can be used during test collection phase.
def get_llm_root(trt_config=None, gitlab_token=None):
if trt_environment:
return trt_environment.setup_tensorrt_llm_repo(trt_config, gitlab_token)
llm_repo_root = os.environ.get("LLM_ROOT", None)
if llm_repo_root is None:
llm_repo_root = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
print_warning(
f"The LLM_ROOT env var is not defined! Using {llm_repo_root} as LLM_ROOT."
)
return llm_repo_root
@pytest.fixture(scope="session")
def llm_root():
return get_llm_root()
@pytest.fixture(scope="session")
def llm_datasets_root() -> str:
return os.path.join(llm_models_root(), "datasets")
@pytest.fixture(scope="session")
def llm_rouge_root() -> str:
return os.path.join(llm_models_root(), "rouge")
@pytest.fixture(scope="module")
def bert_example_root(llm_root):
"Get bert example root"
example_root = os.path.join(llm_root, "examples", "bert")
return example_root
@pytest.fixture(scope="module")
def enc_dec_example_root(llm_root):
"Get encoder-decoder example root"
example_root = os.path.join(llm_root, "examples", "enc_dec")
return example_root
@pytest.fixture(scope="module")
def whisper_example_root(llm_root, llm_venv):
"Get whisper example root"
example_root = os.path.join(llm_root, "examples", "whisper")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def opt_example_root(llm_root, llm_venv):
"Get opt example root"
example_root = os.path.join(llm_root, "examples", "models", "contrib",
"opt")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def llama_example_root(llm_root, llm_venv):
"Get llama example root"
example_root = os.path.join(llm_root, "examples", "llama")
try:
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
except:
print("pip install error!")
return example_root
@pytest.fixture(scope="module")
def llmapi_example_root(llm_root, llm_venv):
"Get llm api example root"
example_root = os.path.join(llm_root, "examples", "llm-api")
return example_root
@pytest.fixture(scope="module")
def disaggregated_example_root(llm_root, llm_venv):
"Get disaggregated example root"
example_root = os.path.join(llm_root, "examples", "disaggregated")
return example_root
@pytest.fixture(scope="module")
def gemma_example_root(llm_root, llm_venv):
"Get gemma example root"
example_root = os.path.join(llm_root, "examples", "gemma")
# https://nvbugs/4559583 Jax dependency broke the entire pipeline in TRT container
# due to the dependency incompatibility with torch, which forced reinstall everything
# and caused pipeline to fail. We manually install gemma dependency as a WAR.
llm_venv.run_cmd(["-m", "pip", "install", "safetensors~=0.4.1", "nltk"])
# Install Jax because it breaks dependency
import platform
google_extension = [
"-f",
"https://storage.googleapis.com/jax-releases/jax_cuda_releases.html"
]
# WAR the new posting of "nvidia-cudnn-cu12~=9.0".
# "jax[cuda12_pip]~=0.4.19" specifies "nvidia-cudnn-cu12>=8.9" but actually requires "nvidia-cudnn-cu12~=8.9".
if "x86_64" in platform.machine():
llm_venv.run_cmd(["-m", "pip", "install", "nvidia-cudnn-cu12~=8.9"])
if "Windows" in platform.system():
llm_venv.run_cmd([
"-m", "pip", "install", "jax~=0.4.19", "jaxlib~=0.4.19", "--no-deps"
] + google_extension)
else:
llm_venv.run_cmd([
"-m", "pip", "install", "jax[cuda12_pip]~=0.4.19",
"jaxlib[cuda12_pip]~=0.4.19", "--no-deps"
] + google_extension)
llm_venv.run_cmd(["-m", "pip", "install", "flax~=0.8.0"])
return example_root
@pytest.fixture(scope="function")
def gemma_model_root(request):
"Get gemma model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if hasattr(request, "param"):
gemma_model_root = os.path.join(models_root, f"gemma/{request.param}")
assert exists(gemma_model_root), f"{gemma_model_root} does not exist!"
return gemma_model_root
@pytest.fixture(scope="function")
def minitron_model_root(request):
"Get minitron model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if hasattr(request, "param"):
assert request.param == "4b"
minitron_model_root = os.path.join(models_root,
"nemotron/Minitron-4B-Base")
assert exists(minitron_model_root), f"{minitron_model_root} does not exist!"
return minitron_model_root
@pytest.fixture(scope="function")
def mistral_nemo_model_root(request):
"Get Mistral Nemo model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if hasattr(request, "param"):
assert request.param == "Mistral-Nemo-12b-Base"
mistral_nemo_model_root = os.path.join(models_root,
"Mistral-Nemo-Base-2407")
assert exists(
mistral_nemo_model_root), f"{mistral_nemo_model_root} does not exist!"
return mistral_nemo_model_root
@pytest.fixture(scope="function")
def mistral_nemo_minitron_model_root(request):
"Get Mistral Nemo Minitron model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if hasattr(request, "param"):
assert request.param == "Mistral-NeMo-Minitron-8B-Instruct"
mistral_nemo_minitron_model_root = os.path.join(
models_root, "Mistral-NeMo-Minitron-8B-Instruct")
assert exists(mistral_nemo_minitron_model_root
), f"{mistral_nemo_minitron_model_root} does not exist!"
return mistral_nemo_minitron_model_root
@pytest.fixture(scope="module")
def gpt_example_root(llm_root, llm_venv):
"Get gpt example root"
example_root = os.path.join(llm_root, "examples", "gpt")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def gptj_example_root(llm_root, llm_venv):
"Get gptj example root"
example_root = os.path.join(llm_root, "examples", "models", "contrib",
"gptj")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def glm_4_9b_example_root(llm_root, llm_venv):
"Get glm-4-9b example root"
example_root = os.path.join(llm_root, "examples", "glm-4-9b")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def exaone_example_root(llm_root, llm_venv):
"Get EXAONE example root"
example_root = os.path.join(llm_root, "examples", "exaone")
return example_root
@pytest.fixture(scope="function")
def llm_exaone_model_root(request) -> str:
"Get EXAONE model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
exaone_model_root = os.path.join(models_root, "exaone")
if hasattr(request, "param"):
if request.param == "exaone_3.0_7.8b_instruct":
exaone_model_root = os.path.join(models_root, "exaone")
elif request.param == "exaone_deep_2.4b":
exaone_model_root = os.path.join(models_root, "EXAONE-Deep-2.4B")
return exaone_model_root
@pytest.fixture(scope="module")
def falcon_example_root(llm_root, llm_venv):
"Get falcon example root"
example_root = os.path.join(llm_root, "examples", "models", "contrib",
"falcon")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="session")
def plugin_gen_path(llm_root):
"Path to the plugin_gen.py script"
return os.path.join(llm_root, "tensorrt_llm", "tools", "plugin_gen",
"plugin_gen.py")
@pytest.fixture(scope="module")
def internlm2_example_root(llm_root, llm_venv):
"Get internlm2 example root"
example_root = os.path.join(llm_root, "examples", "internlm2")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def qwen_example_root(llm_root, llm_venv):
"Get qwen example root"
example_root = os.path.join(llm_root, "examples", "qwen")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def draft_target_model_example_root(llm_root, llm_venv):
"Get Draft-Target-Model example root"
example_root = os.path.join(llm_root, "examples", "draft_target_model")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def prompt_lookup_example_root(llm_root, llm_venv):
"Get Prompt-Lookup example root"
example_root = os.path.join(llm_root, "examples", "prompt_lookup")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def medusa_example_root(llm_root, llm_venv):
"Get medusa example root"
example_root = os.path.join(llm_root, "examples", "medusa")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def redrafter_example_root(llm_root, llm_venv):
"Get ReDrafter example root"
example_root = os.path.join(llm_root, "examples", "redrafter")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def eagle_example_root(llm_root, llm_venv):
"Get EAGLE example root"
example_root = os.path.join(llm_root, "examples", "eagle")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def mamba_example_root(llm_root, llm_venv):
"Get mamba example root"
example_root = os.path.join(llm_root, "examples", "mamba")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
yield example_root
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(llm_root, "requirements.txt")
])
@pytest.fixture(scope="module")
def recurrentgemma_example_root(llm_root, llm_venv):
"Get recurrentgemma example root"
example_root = os.path.join(llm_root, "examples", "recurrentgemma")
# install requirements
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
yield example_root
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(llm_root, "requirements.txt")
])
@pytest.fixture(scope="module")
def nemotron_nas_example_root(llm_root, llm_venv):
example_root = os.path.join(llm_root, "examples", "nemotron_nas")
yield example_root
@pytest.fixture(scope="module")
def nemotron_example_root(llm_root, llm_venv):
"Get nemotron example root"
example_root = os.path.join(llm_root, "examples", "nemotron")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def commandr_example_root(llm_root, llm_venv):
"Get commandr example root"
example_root = os.path.join(llm_root, "examples", "commandr")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="module")
def deepseek_v2_example_root(llm_root, llm_venv):
"Get deepseek v2 example root"
example_root = os.path.join(llm_root, "examples", "models", "contrib",
"deepseek_v2")
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(example_root, "requirements.txt")
])
return example_root
@pytest.fixture(scope="function")
def deepseek_v3_model_root(request):
models_root = llm_models_root()
if (request.param == "DeepSeek-V3"):
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3")
elif (request.param == "DeepSeek-V3-Lite-bf16"):
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite",
"bf16")
elif (request.param == "DeepSeek-V3-Lite-fp8"):
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite",
"fp8")
elif (request.param == "DeepSeek-V3-Lite-nvfp4_moe_only"):
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite",
"nvfp4_moe_only")
assert exists(
deepseek_v3_model_root), f"{deepseek_v3_model_root} does not exist!"
return deepseek_v3_model_root
@pytest.fixture(scope="session")
def trt_performance_cache_name():
return "performance.cache"
@pytest.fixture(scope="session")
def trt_performance_cache_fpath(llm_venv, trt_performance_cache_name):
workspace = llm_venv.get_working_directory()
fpath = os.path.join(workspace, trt_performance_cache_name)
if is_wsl():
return wsl_to_win_path(fpath)
return fpath
# Get the executing perf case name
@pytest.fixture(autouse=True)
def perf_case_name(request):
return request.node.nodeid
@pytest.fixture(scope="session")
def output_dir(request):
output = request.config.getoption("--output-dir")
if output:
os.makedirs(str(output), exist_ok=True)
return output
@pytest.fixture(scope="session")
def trt_gpu_clock_lock(request):
"""
Fixture for the GPUClockLock, used to interface with pynvml to get system properties and to lock/monitor GPU clocks.
"""
gpu_list = get_gpu_device_list()
gpu_ids = [gpu.split()[1][:-1] for gpu in gpu_list] # Extract GPU IDs
gpu_ids_str = ",".join(gpu_ids)
gpu_clock_lock = GPUClockLock(
gpu_id=gpu_ids_str,
interval_ms=1000.0,
)
yield gpu_clock_lock
gpu_clock_lock.teardown()
@pytest.fixture(scope="session")
def llm_session_data_writer(request, trt_gpu_clock_lock, output_dir):
"""
Fixture for the SessionDataWriter, used to write session data to output directory.
"""
session_data_writer = SessionDataWriter(
log_output_directory=output_dir,
output_formats=request.config.getoption("--perf-log-formats"),
gpu_clock_lock=trt_gpu_clock_lock,
)
yield session_data_writer
session_data_writer.teardown()
@pytest.fixture(scope="session")
def custom_user_workspace(request):
return request.config.getoption("--workspace")
@pytest.fixture(scope="session")
def llm_venv(llm_root, custom_user_workspace):
workspace_dir = custom_user_workspace
subdir = datetime.datetime.now().strftime("ws-%Y-%m-%d-%H-%M-%S")
if workspace_dir is None:
workspace_dir = "llm-test-workspace"
workspace_dir = os.path.join(workspace_dir, subdir)
from defs.local_venv import PythonVenvRunnerImpl
return PythonVenvRunnerImpl("", "", "python3",
os.path.join(os.getcwd(), workspace_dir))
@pytest.fixture(scope="session")
@cached_in_llm_models_root("gpt-next/megatron_converted_843m_tp1_pp1.nemo",
True)
def gpt_next_root():
"get gpt-next/megatron_converted_843m_tp1_pp1.nemo"
raise RuntimeError("megatron_converted_843m_tp1_pp1.nemo must be cached")
@pytest.fixture(scope="function")
def bert_model_root(hf_bert_model_root):
"Get bert model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
bert_model_name = hf_bert_model_root
bert_model_root = os.path.join(models_root, bert_model_name)
assert os.path.exists(
bert_model_root
), f"{bert_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return (bert_model_name, bert_model_root)
@pytest.fixture(scope="function")
def enc_dec_model_root(request):
"Get enc-dec model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
tllm_model_name = request.param
if not "wmt" in tllm_model_name:
# HuggingFace root
enc_dec_model_root = os.path.join(models_root, tllm_model_name)
else:
# FairSeq root
enc_dec_model_root = os.path.join(models_root, "fairseq-models",
tllm_model_name)
assert os.path.exists(
enc_dec_model_root
), f"{enc_dec_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return (tllm_model_name, enc_dec_model_root)
@pytest.fixture(scope="function")
def whisper_model_root(request):
"Get whisper model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
assert request.param in [
"large-v2", "large-v3"
], "whisper only supports large-v2 or large-v3 for now"
tllm_model_name = request.param
whisper_model_root = os.path.join(models_root, "whisper-models",
tllm_model_name)
assert os.path.exists(
whisper_model_root
), f"{whisper_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return (tllm_model_name, whisper_model_root)
@pytest.fixture(scope="function")
def whisper_example_audio_file(whisper_model_root):
return os.path.join(whisper_model_root[1], "1221-135766-0002.wav")
@pytest.fixture(scope="function")
def multimodal_model_root(request, llm_venv):
"Get multimodal model root"
models_root = os.path.join(llm_models_root(), 'multimodals')
assert models_root, "Did you set LLM_MODELS_ROOT?"
tllm_model_name = request.param
if 'VILA' in tllm_model_name:
models_root = os.path.join(llm_models_root(), 'vila')
if 'cogvlm-chat' in tllm_model_name:
models_root = os.path.join(llm_models_root(), 'cogvlm-chat')
if 'video-neva' in tllm_model_name:
models_root = os.path.join(llm_models_root(), 'video-neva')
tllm_model_name = tllm_model_name + ".nemo"
if 'neva-22b' in tllm_model_name:
models_root = os.path.join(llm_models_root(), 'neva')
tllm_model_name = tllm_model_name + ".nemo"
elif 'Llama-3.2' in tllm_model_name:
models_root = os.path.join(llm_models_root(), 'llama-3.2-models')
multimodal_model_root = os.path.join(models_root, tllm_model_name)
if 'llava-onevision' in tllm_model_name and 'video' in tllm_model_name:
multimodal_model_root = multimodal_model_root[:-6]
elif 'llava-v1.6' in tllm_model_name and 'vision-trtllm' in tllm_model_name:
multimodal_model_root = multimodal_model_root[:-14]
assert os.path.exists(
multimodal_model_root
), f"{multimodal_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
yield (tllm_model_name, multimodal_model_root)
if 'llava-onevision' in tllm_model_name:
llm_venv.run_cmd(['-m', 'pip', 'uninstall', 'llava', '-y'])
@pytest.fixture(scope="function")
def update_transformers(llm_venv, llm_root):
yield
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(llm_root, "requirements.txt")
])
def remove_file(fn):
if os.path.isfile(fn) or os.path.islink(fn):
os.remove(fn)
@pytest.fixture(scope="module")
@cached_in_llm_models_root("replit-code-v1_5-3b", True)
def llm_replit_code_v1_5_3b_model_root():
"Get replit-code-v1_5-3b model root"
raise RuntimeError("replit-code-v1_5-3b must be cached")
@pytest.fixture(scope="module")
@cached_in_llm_models_root("gpt2", True)
def llm_gpt2_model_root():
"Get gpt2 model root"
raise RuntimeError("gpt2 must be cached")
@pytest.fixture(scope="module")
@cached_in_llm_models_root("gpt2-medium", True)
def llm_gpt2_medium_model_root():
"Get gpt2 medium model root"
raise RuntimeError("gpt2-medium must be cached")
@pytest.fixture(scope="module")
@cached_in_llm_models_root("GPT-2B-001_bf16_tp1.nemo", True)
def llm_gpt2_next_model_root():
"get gpt-2b-001_bf16_tp1.nemo"
raise RuntimeError("GPT-2B-001_bf16_tp1.nemo must be cached")
@pytest.fixture(scope="module")
@cached_in_llm_models_root("santacoder", True)
def llm_gpt2_santacoder_model_root():
"get santacoder data"
raise RuntimeError("santacoder must be cached")
@pytest.fixture(scope="module")
def llm_gpt2_starcoder_model_root(llm_venv, request):
"get starcoder-model"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
starcoder_model_root = os.path.join(models_root, "starcoder-model")
if hasattr(request, "param"):
if request.param == "starcoder":
starcoder_model_root = os.path.join(models_root, "starcoder-model")
elif request.param == "starcoderplus":
starcoder_model_root = os.path.join(models_root, "starcoderplus")
elif request.param == "starcoder2":
starcoder_model_root = os.path.join(models_root, "starcoder2-model")
return starcoder_model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("starcoder2-3b", True)
def llm_gpt2_starcoder2_model_root():
"get starcoder2-3b"
raise RuntimeError("starcoder2-3b must be cached")
@pytest.fixture(scope="function")
def starcoder_model_root(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if request.param == "starcoder":
starcoder_model_root = os.path.join(models_root, "starcoder-model")
elif request.param == "starcoder2-15b":
starcoder_model_root = os.path.join(models_root, "starcoder2-model")
elif request.param == "starcoder2-3b":
starcoder_model_root = os.path.join(models_root, "starcoder2-3b")
elif request.param == "starcoderplus":
starcoder_model_root = os.path.join(models_root, "starcoderplus")
assert os.path.exists(
starcoder_model_root
), f"{starcoder_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return starcoder_model_root
@pytest.fixture(scope="function")
def llm_gpt2b_lora_model_root(request):
"get gpt2b lora model"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
model_root_list = []
lora_root = os.path.join(models_root, "lora", "gpt-next-2b")
if hasattr(request, "param"):
if isinstance(request.param, tuple):
model_list = list(request.param)
else:
model_list = [request.param]
for item in model_list:
if item == "gpt2b_lora-900.nemo":
model_root_list.append(
os.path.join(lora_root, "gpt2b_lora-900.nemo"))
elif item == "gpt2b_lora-stories.nemo":
model_root_list.append(
os.path.join(lora_root, "gpt2b_lora-stories.nemo"))
return ",".join(model_root_list)
@pytest.fixture(scope="module")
def llama_tokenizer_model_root():
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
# Use llama-7b-hf to load tokenizer
llama_tokenzier_model_root = os.path.join(models_root, "llama-models",
"llama-7b-hf")
return llama_tokenzier_model_root
@pytest.fixture(scope="module")
def llama_v2_tokenizer_model_root():
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
llama_v2_tokenizer_model_root = os.path.join(models_root, "llama-models-v2")
assert os.path.exists(
llama_v2_tokenizer_model_root
), f"{llama_v2_tokenizer_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return llama_v2_tokenizer_model_root
@pytest.fixture(scope="function")
def llama_model_root(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if request.param == "llama-7b":
llama_model_root = os.path.join(models_root, "llama-models",
"llama-7b-hf")
elif request.param == "llama-30b":
llama_model_root = os.path.join(models_root, "llama-models",
"llama-30b-hf")
elif request.param == "TinyLlama-1.1B-Chat-v1.0":
llama_model_root = os.path.join(models_root, "llama-models-v2",
"TinyLlama-1.1B-Chat-v1.0")
elif request.param == "llama-v2-7b":
llama_model_root = os.path.join(models_root, "llama-models-v2", "7B")
elif request.param == "llama-v2-70b":
llama_model_root = os.path.join(models_root, "llama-models-v2", "70B")
elif request.param == "llama-v2-70b-hf":
llama_model_root = os.path.join(models_root, "llama-models-v2",
"llama-v2-70b-hf")
elif request.param == "Llama-2-7B-AWQ":
llama_model_root = os.path.join(models_root, "llama-models-v2",
"Llama-2-7B-AWQ")
elif request.param == "Llama-2-7B-GPTQ":
llama_model_root = os.path.join(models_root, "llama-models-v2",
"Llama-2-7B-GPTQ")
elif request.param == "llama-v2-13b-hf":
llama_model_root = os.path.join(models_root, "llama-models-v2",
"llama-v2-13b-hf")
elif request.param == "llama-v2-7b-hf":
llama_model_root = os.path.join(models_root, "llama-models-v2",
"llama-v2-7b-hf")
elif request.param == "llama-v2-70b-hf":
llama_model_root = os.path.join(models_root, "llama-models-v2",
"llama-v2-70b-hf")
elif request.param == "llama-v3-8b-hf":
llama_model_root = os.path.join(models_root, "llama-models-v3", "8B")
elif request.param == "llama-v3-8b-instruct-hf":
llama_model_root = os.path.join(models_root, "llama-models-v3",
"llama-v3-8b-instruct-hf")
elif request.param == "Llama-3-8B-Instruct-Gradient-1048k":
llama_model_root = os.path.join(models_root, "llama-models-v3",
"Llama-3-8B-Instruct-Gradient-1048k")
elif request.param == "Llama-3-70B-Instruct-Gradient-1048k":
llama_model_root = os.path.join(models_root, "llama-models-v3",
"Llama-3-70B-Instruct-Gradient-1048k")
elif request.param == "llama-3.1-405b":
llama_model_root = os.path.join(models_root, "llama-3.1-model",
"Meta-Llama-3.1-405B")
elif request.param == "llama-3.1-405b-fp8":
llama_model_root = os.path.join(models_root, "llama-3.1-model",
"Meta-Llama-3.1-405B-FP8")
elif request.param == "llama-3.1-70b":
llama_model_root = os.path.join(models_root, "llama-3.1-model",
"Meta-Llama-3.1-70B")
elif request.param == "llama-3.1-8b":
llama_model_root = os.path.join(models_root, "llama-3.1-model",
"Meta-Llama-3.1-8B")
elif request.param == "llama-3.1-8b-instruct-hf-fp8":
llama_model_root = os.path.join(models_root, "llama-3.1-model",
"Llama-3.1-8B-Instruct-FP8")
elif request.param == "llama-3.1-8b-hf-nvfp4":
llama_model_root = os.path.join(models_root, "nvfp4-quantized",
"Meta-Llama-3.1-8B")
elif request.param == "llama-3.1-70b-instruct":
llama_model_root = os.path.join(models_root, "llama-3.1-model",
"Meta-Llama-3.1-70B-Instruct")
elif request.param == "llama-3.2-1b":
llama_model_root = os.path.join(models_root, "llama-3.2-models",
"Llama-3.2-1B")
elif request.param == "llama-3.2-3b":
llama_model_root = os.path.join(models_root, "llama-3.2-models",
"Llama-3.2-3B")
assert os.path.exists(
llama_model_root
), f"{llama_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return llama_model_root
@pytest.fixture(scope="function")
def code_llama_model_root(request):
"get CodeLlama model data"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if request.param == "CodeLlama-7b-Instruct":
codellama_model_root = os.path.join(models_root, "codellama",
"CodeLlama-7b-Instruct-hf")
elif request.param == "CodeLlama-13b-Instruct":
codellama_model_root = os.path.join(models_root, "codellama",
"CodeLlama-13b-Instruct-hf")
elif request.param == "CodeLlama-34b-Instruct":
codellama_model_root = os.path.join(models_root, "codellama",
"CodeLlama-34b-Instruct-hf")
elif request.param == "CodeLlama-70b-hf":
codellama_model_root = os.path.join(models_root, "codellama",
"CodeLlama-70b-hf")
return codellama_model_root
@pytest.fixture(scope="function")
def draft_target_model_roots(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
draft_model_root = None
target_model_root = None
if request.param == "gpt2":
draft_model_root = os.path.join(models_root, "gpt2-medium")
target_model_root = os.path.join(models_root, "gpt2-medium")
elif request.param == "llama_v2":
draft_model_root = os.path.join(models_root,
"llama-models-v2/llama-v2-7b-hf")
target_model_root = os.path.join(models_root,
"llama-models-v2/llama-v2-13b-hf")
assert os.path.exists(
draft_model_root
), f"Draft-Target-Model draft model path {draft_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
assert os.path.exists(
target_model_root
), f"Draft-Target-Model target model path {target_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return draft_model_root, target_model_root
@pytest.fixture(scope="function")
def prompt_lookup_root(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if request.param == "gpt2":
models_root = os.path.join(models_root, "gpt2-medium")
elif request.param == "llama_v2":
models_root = os.path.join(models_root,
"llama-models-v2/llama-v2-13b-hf")
assert os.path.exists(
models_root
), f"Prompt-Lookup model path {models_root} does not exist under NFS LLM_MODELS_ROOT dir"
return models_root
@pytest.fixture(scope="function")
def medusa_model_roots(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
base_model_root_for_medusa = None
medusa_heads_model_root = None
if request.param == "medusa-vicuna-7b-v1.3":
base_model_root_for_medusa = os.path.join(models_root, "vicuna-7b-v1.3")
medusa_heads_model_root = os.path.join(models_root,
"medusa-vicuna-7b-v1.3")
elif request.param == "llama3.1-medusa-8b-hf_v0.1":
base_model_root_for_medusa = os.path.join(models_root,
"llama3.1-medusa-8b-hf_v0.1")
medusa_heads_model_root = base_model_root_for_medusa
assert os.path.exists(
base_model_root_for_medusa
), f"Medusa base model path {base_model_root_for_medusa} does not exist under NFS LLM_MODELS_ROOT dir"
assert os.path.exists(
medusa_heads_model_root
), f"Medusa heads model path {medusa_heads_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return base_model_root_for_medusa, medusa_heads_model_root
@pytest.fixture(scope="function")
def lookahead_model_roots(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
base_model_root_for_lookahead = None
if request.param == "vicuna-7b-v1.3":
base_model_root_for_lookahead = os.path.join(models_root,
"vicuna-7b-v1.3")
assert os.path.exists(
base_model_root_for_lookahead
), f"Lookahead base model path {base_model_root_for_lookahead} does not exist under NFS LLM_MODELS_ROOT dir"
return base_model_root_for_lookahead
@pytest.fixture(scope="function")
def redrafter_model_roots(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
base_model_root_for_redrafter = None
redrafter_drafting_model_root = None
if request.param == "redrafter-vicuna-7b-v1.3":
base_model_root_for_redrafter = os.path.join(models_root,
"vicuna-7b-v1.3")
redrafter_drafting_model_root = os.path.join(
models_root, "redrafter-vicuna-7b-v1.3")
assert os.path.exists(
base_model_root_for_redrafter
), f"ReDrafter base model path {base_model_root_for_redrafter} does not exist under NFS LLM_MODELS_ROOT dir"
assert os.path.exists(
redrafter_drafting_model_root
), f"ReDrafter heads model path {redrafter_drafting_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return base_model_root_for_redrafter, redrafter_drafting_model_root
@pytest.fixture(scope="function")
def eagle_model_roots(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
base_model_root_for_eagle = None
eagle_heads_model_root = None
if request.param == "EAGLE-Vicuna-7B-v1.3":
# Test the checkpoint released from HF, which requires two separate weights,
# one for the base model and one for the EagleNets.
base_model_root_for_eagle = os.path.join(models_root, "vicuna-7b-v1.3")
eagle_heads_model_root = os.path.join(models_root,
"EAGLE-Vicuna-7B-v1.3")
assert os.path.exists(
base_model_root_for_eagle
), f"EAGLE base model path {base_model_root_for_eagle} does not exist under NFS LLM_MODELS_ROOT dir"
assert os.path.exists(
eagle_heads_model_root
), f"EAGLE heads model path {eagle_heads_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return base_model_root_for_eagle, eagle_heads_model_root
elif request.param == "llama3.1-eagle-8b-hf_v0.5":
# Test the checkpoint released from ModelOpt, which only requires one weight,
# which includes both the base model and EagleNets, and is an FP8 datatype.
modelopt_checkpoint_root_for_eagle = os.path.join(
models_root, "modelopt-hf-model-hub", "llama3.1-eagle-8b-hf_v0.5")
assert os.path.exists(
modelopt_checkpoint_root_for_eagle
), f"EAGLE ModelOpt checkpoint path {modelopt_checkpoint_root_for_eagle} does not exist under NFS LLM_MODELS_ROOT dir"
return modelopt_checkpoint_root_for_eagle
else:
assert "Error Eagle weight's name"
@pytest.fixture(scope="function")
def mamba_model_root(request):
"get mamba model data"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
mamba_model_root = os.path.join(models_root, 'mamba', "mamba-130m-hf")
if hasattr(request, "param"):
if request.param == "mamba-2.8b":
mamba_model_root = os.path.join(models_root, 'mamba',
"mamba-2.8b-hf")
elif request.param == "mamba-130m":
mamba_model_root = os.path.join(models_root, 'mamba',
"mamba-130m-hf")
elif request.param == "mamba-1.4b":
mamba_model_root = os.path.join(models_root, 'mamba',
"mamba-1.4b-hf")
elif request.param == "mamba-790m":
mamba_model_root = os.path.join(models_root, 'mamba',
"mamba-790m-hf")
elif request.param == "mamba-370m":
mamba_model_root = os.path.join(models_root, 'mamba',
"mamba-370m-hf")
elif request.param == "mamba2-2.7b":
mamba_model_root = os.path.join(models_root, 'mamba2',
"mamba2-2.7b")
elif request.param == "mamba2-1.3b":
mamba_model_root = os.path.join(models_root, 'mamba2',
"mamba2-1.3b")
elif request.param == "mamba2-780m":
mamba_model_root = os.path.join(models_root, 'mamba2',
"mamba2-780m")
elif request.param == "mamba2-370m":
mamba_model_root = os.path.join(models_root, 'mamba2',
"mamba2-370m")
elif request.param == "mamba2-130m":
mamba_model_root = os.path.join(models_root, 'mamba2',
"mamba2-130m")
elif request.param == "mamba-codestral-7B-v0.1":
mamba_model_root = os.path.join(models_root, 'mamba2',
"mamba-codestral-7B-v0.1")
assert exists(mamba_model_root), f"{mamba_model_root} does not exist!"
return mamba_model_root
@pytest.fixture(scope="function")
def recurrentgemma_model_root(request):
"get recurrentgemma model data"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
assert hasattr(request, "param"), "Param is missing!"
if request.param == "recurrentgemma-2b":
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
"recurrentgemma-2b")
elif request.param == "recurrentgemma-2b-it":
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
"recurrentgemma-2b-it")
elif request.param == "recurrentgemma-2b-flax":
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
"recurrentgemma-2b-flax", "2b")
elif request.param == "recurrentgemma-2b-it-flax":
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
"recurrentgemma-2b-it-flax",
"2b-it")
assert exists(recurrentgemma_model_root
), f"{recurrentgemma_model_root} does not exist!"
return recurrentgemma_model_root
@pytest.fixture(scope="function")
def nemotron_nas_model_root(request):
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
assert hasattr(request, "param"), "Param is missing!"
nemotron_nas_model_root = os.path.join(models_root, "nemotron-nas",
request.param)
assert exists(
nemotron_nas_model_root), f"{nemotron_nas_model_root} doesn't exist!"
return nemotron_nas_model_root
@pytest.fixture(scope="function")
def llm_lora_model_root(request):
"get lora model path"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
assert hasattr(request, "param"), "Param is missing!"
model_list = []
model_root_list = []
if isinstance(request.param, tuple):
model_list = list(request.param)
else:
model_list = [request.param]
for item in model_list:
if item == "chinese-llama-2-lora-13b":
model_root_list.append(
os.path.join(models_root, "llama-models-v2",
"chinese-llama-2-lora-13b"))
elif item == "Japanese-Alpaca-LoRA-7b-v0":
model_root_list.append(
os.path.join(models_root, "llama-models",
"Japanese-Alpaca-LoRA-7b-v0"))
elif item == "luotuo-lora-7b-0.1":
model_root_list.append(
os.path.join(models_root, "llama-models", "luotuo-lora-7b-0.1"))
elif item == "Ko-QWEN-7B-Chat-LoRA":
model_root_list.append(
os.path.join(models_root, "Ko-QWEN-7B-Chat-LoRA"))
elif item == "Qwen1.5-7B-Chat-750Mb-lora":
model_root_list.append(
os.path.join(models_root, "Qwen1.5-7B-Chat-750Mb-lora"))
elif item == "Upcycled-Qwen1.5-MoE2.7B-LoRA":
model_root_list.append(
os.path.join(models_root, "Upcycled-Qwen1.5-MoE2.7B-LoRA"))
elif item == "Phi-3-mini-4k-instruct-ru-lora":
model_root_list.append(
os.path.join(models_root, "lora", "phi",
"Phi-3-mini-4k-instruct-ru-lora"))
elif item == "peft-lora-starcoder2-15b-unity-copilot":
model_root_list.append(
os.path.join(models_root, "lora", "starcoder",
"peft-lora-starcoder2-15b-unity-copilot"))
elif item == "chinese-mixtral-lora":
model_root_list.append(
os.path.join(models_root, "chinese-mixtral-lora"))
elif item == "komt-mistral-7b-v1-lora":
model_root_list.append(
os.path.join(models_root, "komt-mistral-7b-v1-lora"))
return ",".join(model_root_list)
@pytest.fixture(scope="function")
def llm_dora_model_root(request):
"get dora model path"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
assert hasattr(request, "param"), "Param is missing!"
model_list = []
model_root_list = []
if isinstance(request.param, tuple):
model_list = list(request.param)
else:
model_list = [request.param]
for item in model_list:
if item == "commonsense-llama-v3-8b-dora-r32":
model_root_list.append(
os.path.join(models_root, "llama-models-v3", "DoRA-weights",
"llama_dora_commonsense_checkpoints", "LLama3-8B",
"dora_r32"))
return ",".join(model_root_list)
@pytest.fixture(scope="function")
def llm_mistral_model_root(request):
"get mistral model path"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
model_root = os.path.join(models_root, "mistral-7b-v0.1")
if request.param == "mistral-7b-v0.1":
model_root = os.path.join(models_root, "mistral-7b-v0.1")
if request.param == "komt-mistral-7b-v1":
model_root = os.path.join(models_root, "komt-mistral-7b-v1")
if request.param == "mistral-7b-v0.3":
model_root = os.path.join(models_root, "Mistral-7B-Instruct-v0.3")
return model_root
@pytest.fixture(scope="function")
def llm_mixtral_model_root(request):
"get mixtral model path"
models_root = llm_models_root()
model_root = os.path.join(models_root, "Mixtral-8x7B-v0.1")
assert models_root, "Did you set LLM_MODELS_ROOT?"
if request.param == "Mixtral-8x7B-v0.1":
model_root = os.path.join(models_root, "Mixtral-8x7B-v0.1")
if request.param == "Mixtral-8x22B-v0.1":
model_root = os.path.join(models_root, "Mixtral-8x22B-v0.1")
if request.param == "Mixtral-8x7B-Instruct-v0.1":
model_root = os.path.join(models_root, "Mixtral-8x7B-Instruct-v0.1")
return model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("mathstral-7B-v0.1", True)
def llm_mathstral_model_root(llm_venv):
"return mathstral-7B-v0.1 model root"
workspace = llm_venv.get_working_directory()
long_mathstral_model_root = os.path.join(workspace, "mathstral-7B-v0.1")
return long_mathstral_model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("LongAlpaca-7B", True)
def llm_long_alpaca_model_root(llm_venv):
"return long alpaca model root"
workspace = llm_venv.get_working_directory()
long_alpaca_model_root = os.path.join(workspace, "LongAlpaca-7B")
return long_alpaca_model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("gpt-neox-20b", True)
def llm_gptneox_model_root(llm_venv):
"return gptneox model root"
workspace = llm_venv.get_working_directory()
gptneox_model_root = os.path.join(workspace, "gpt-neox-20b")
return gptneox_model_root
@pytest.fixture(scope="function")
def llm_phi_model_root(request):
"return phi model root"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if 'Phi-3.5' in request.param:
phi_model_root = os.path.join(models_root, 'Phi-3.5/' + request.param)
elif 'Phi-3' in request.param:
phi_model_root = os.path.join(models_root, 'Phi-3/' + request.param)
else:
phi_model_root = os.path.join(models_root, request.param)
assert os.path.exists(
phi_model_root
), f"{phi_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
return phi_model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("falcon-180b", True)
def llm_falcon_180b_model_root():
"prepare falcon 180b model & return falcon model root"
raise RuntimeError("falcon 180b must be cached")
@pytest.fixture(scope="module")
@cached_in_llm_models_root("falcon-11B", True)
def llm_falcon_11b_model_root(llm_venv):
"prepare falcon-11B model & return falcon model root"
workspace = llm_venv.get_working_directory()
model_root = os.path.join(workspace, "falcon-11B")
call(f"git clone https://huggingface.co/tiiuae/falcon-11B {model_root}",
shell=True)
return model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("email_composition", True)
def llm_gpt2_next_8b_model_root():
raise RuntimeError("gpt-next 8b must be cached")
@pytest.fixture(scope="function")
def llm_glm_4_9b_model_root(request):
"prepare glm-4-9b model & return model path"
model_name = request.param
models_root = llm_models_root()
if model_name == "glm-4-9b":
model_root = os.path.join(models_root, "glm-4-9b")
elif model_name == "glm-4-9b-chat":
model_root = os.path.join(models_root, "glm-4-9b-chat")
elif model_name == "glm-4-9b-chat-1m":
model_root = os.path.join(models_root, "glm-4-9b-chat-1m")
elif model_name == "glm-4v-9b":
model_root = os.path.join(models_root, "glm-4v-9b")
return model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("internlm-chat-7b", True)
def llm_internlm_7b_model_root(llm_venv):
"prepare internlm 7b model"
workspace = llm_venv.get_working_directory()
model_root = os.path.join(workspace, "internlm-chat-7b")
call(
f"git clone https://huggingface.co/internlm/internlm-chat-7b {model_root}",
shell=True)
return model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("internlm2-7b", True)
def llm_internlm2_7b_model_root(llm_venv):
"prepare internlm2 7b model"
workspace = llm_venv.get_working_directory()
model_root = os.path.join(workspace, "internlm2-7b")
call(f"git clone https://huggingface.co/internlm/internlm2-7b {model_root}",
shell=True)
return model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("internlm-chat-20b", True)
def llm_internlm_20b_model_root(llm_venv):
"prepare internlm 20b model"
workspace = llm_venv.get_working_directory()
model_root = os.path.join(workspace, "internlm-chat-20b")
call(
f"git clone https://huggingface.co/internlm/internlm-chat-20b {model_root}",
shell=True)
return model_root
@pytest.fixture(scope="module")
@cached_in_llm_models_root("Qwen-7B-Chat", True)
def llm_qwen_7b_model_root(llm_venv):
"prepare qwen-7b model & return model path"
workspace = llm_venv.get_working_directory()
model_root = os.path.join(workspace, "Qwen-7B-Chat")
return model_root
@pytest.fixture(scope="function")
def llm_qwen_model_root(request, llm_venv):
"prepare qwen model & return model path"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat")
if hasattr(request, "param"):
if request.param == "qwen_7b_chat":
qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat")
elif request.param == "qwen_14b_chat":
qwen_model_root = os.path.join(models_root, "Qwen-14B-Chat")
elif request.param == "qwen_72b_chat":
qwen_model_root = os.path.join(models_root, "Qwen-72B-Chat")
elif request.param == "qwen_7b_chat_int4":
qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat-Int4")
elif request.param == "qwen-vl-chat":
qwen_model_root = os.path.join(models_root, "Qwen-VL-Chat")
elif request.param == "qwen1.5_7b_chat_awq":
qwen_model_root = os.path.join(models_root, "Qwen1.5-7B-Chat-AWQ")
elif request.param == "qwen1.5_0.5b_chat":
qwen_model_root = os.path.join(models_root, "Qwen1.5-0.5B-Chat")
elif request.param == "qwen1.5_7b_chat":
qwen_model_root = os.path.join(models_root, "Qwen1.5-7B-Chat")
elif request.param == "qwen1.5_14b_chat":
qwen_model_root = os.path.join(models_root, "Qwen1.5-14B-Chat")
elif request.param == "qwen1.5_moe_a2.7b_chat":
qwen_model_root = os.path.join(models_root,
"Qwen1.5-MoE-A2.7B-Chat")
elif request.param == "qwen1.5_72b_chat":
qwen_model_root = os.path.join(models_root, "Qwen1.5-72B-Chat")
elif request.param == "qwen1.5_moe_a2.7b_chat":
qwen_model_root = os.path.join(models_root,
"Qwen1.5-MoE-A2.7B-Chat")
elif request.param == "qwen1.5_14b_chat_int4":
qwen_model_root = os.path.join(models_root,
"Qwen1.5-14B-Chat-GPTQ-Int4")
elif request.param == "qwen2_0.5b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2-0.5B-Instruct")
elif request.param == "qwen2_7b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2-7B-Instruct")
elif request.param == "qwen2_7b_awq":
qwen_model_root = os.path.join(models_root, "Qwen2-7B-Instruct-AWQ")
elif request.param == "qwen2_57b_a14b":
qwen_model_root = os.path.join(models_root, "Qwen2-57B-A14B")
elif request.param == "qwen2_72b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2-72B-Instruct")
elif request.param == "qwen2_vl_7b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2-VL-7B-Instruct")
elif request.param == "qwen2_audio_7b_instruct":
qwen_model_root = os.path.join(models_root,
"Qwen2-Audio-7B-Instruct")
elif request.param == "qwen2.5_0.5b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2.5-0.5B-Instruct")
elif request.param == "qwen2.5_1.5b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2.5-1.5B-Instruct")
elif request.param == "qwen2.5_7b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2.5-7B-Instruct")
elif request.param == "qwen2.5_14b_instruct_int4":
qwen_model_root = os.path.join(models_root,
"Qwen2.5-14B-Instruct-GPTQ-Int4")
elif request.param == "qwen2.5_72b_instruct":
qwen_model_root = os.path.join(models_root, "Qwen2.5-72B-Instruct")
assert exists(qwen_model_root), f"{qwen_model_root} does not exist!"
return qwen_model_root
@pytest.fixture(scope="function")
def llm_granite_model_root(request):
models_root = llm_models_root()
model_name = request.param
granite_model_root = os.path.join(models_root, model_name)
assert exists(granite_model_root), f"{granite_model_root} does not exist!"
return granite_model_root
@pytest.fixture(scope="session")
@cached_in_llm_models_root("nemotron/Nemotron-3-8B-Base-4k.nemo", True)
def llm_nemotron_3_8b_model_root():
"get nemotron/Nemotron-3-8B-Base-4k.nemo"
raise RuntimeError("nemotron/Nemotron-3-8B-Base-4k.nemo must be cached")
@pytest.fixture(scope="session")
@cached_in_llm_models_root("nemotron/Nemotron-4-15B-Base.nemo", True)
def llm_nemotron_4_15b_model_root():
"get nemotron/Nemotron-4-15B-Base.nemo"
raise RuntimeError("nemotron/Nemotron-4-15B-Base.nemo must be cached")
@pytest.fixture(scope="session")
def mmlu_dataset_root():
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
mmlu_dataset_root = os.path.join(models_root, "datasets", "mmlu")
assert os.path.exists(
mmlu_dataset_root
), f"{mmlu_dataset_root} does not exist under NFS LLM_MODELS_ROOT dir"
return mmlu_dataset_root
@pytest.fixture(scope="function")
def deepseek_model_root(request):
"get deepseek model"
models_root = llm_models_root()
assert models_root, "Did you set LLM_MODELS_ROOT?"
if request.param == "deepseek-coder-6.7b-instruct":
model_root = os.path.join(models_root, "deepseek-coder-6.7b-instruct")
return model_root
@pytest.fixture(scope="module")
def llm_commandr_v01_model_root(llm_venv):
"prepare command-r model & return model path"
models_root = llm_models_root()
model_root = os.path.join(models_root, "c4ai-command-r-v01")
return model_root
@pytest.fixture(scope="module")
def llm_commandr_plus_model_root(llm_venv):
"prepare command-r-plus model & return model path"
models_root = llm_models_root()
model_root = os.path.join(models_root, "c4ai-command-r-plus")
return model_root
@pytest.fixture(scope="module")
def llm_aya_23_8b_model_root(llm_venv):
"prepare Aya-23-8B model & return model path"
models_root = llm_models_root()
model_root = os.path.join(models_root, "aya-23-8B")
return model_root
@pytest.fixture(scope="module")
def llm_aya_23_35b_model_root(llm_venv):
"prepare Aya-23-35B model & return model path"
models_root = llm_models_root()
model_root = os.path.join(models_root, "aya-23-35B")
return model_root
def evaltool_mmlu_post_process(results_path, baseline, threshold):
# Note: In the older version of the lm-harness result file,
# there are 57 values.
# The latest version of lm-harness includes
# 4 additional categories and 1 whole dataset in the result file.
# We need to exclude these new categories and
# the whole dataset when calculating the average.
with open(results_path) as f:
result = json.load(f)
acc_acc = 0.0
tasks_to_ignore = [
"mmlu_str", "mmlu_str_stem", "mmlu_str_other",
"mmlu_str_social_sciences", "mmlu_str_humanities"
]
total_task = len(result['results']) - len(tasks_to_ignore)
assert total_task == 57
for sub_task in result['results']:
if sub_task in tasks_to_ignore:
continue
acc_acc += float(result['results'][sub_task]['exact_match,none'])
avg_acc = acc_acc / total_task
print("MMLU avg accuracy:", avg_acc)
assert abs(avg_acc - baseline) <= threshold
def evaltool_wikilingua_post_process(results_path, baseline, threshold):
with open(results_path) as f:
result = json.load(f)
rouge_l = result['results']['wikilingua_english']['rougeL,none']
print("Wikilingua_english rouge_L:", rouge_l)
assert abs(rouge_l - baseline) <= threshold
def evaltool_humaneval_post_process(results_path, baseline, threshold):
with open(results_path) as f:
result = json.load(f)
print(result)
acc = result[0]['humaneval']['pass@1']
assert abs(acc - baseline) <= threshold
def evaltool_mtbench_post_process(results_path, baseline, threshold):
with open(results_path) as f:
get_result = False
for total_score in f:
if total_score.startswith('total'):
get_result = True
total_score = float(total_score.split(',')[1].strip())
assert abs(total_score - baseline) <= threshold
assert get_result
@pytest.fixture(scope="module")
def evaltool_root(llm_venv):
if GITLAB_API_USER is None or GITLAB_API_TOKEN is None or EVALTOOL_REPO_URL is None:
pytest.skip(
"Need to set GITLAB_API_USER, GITLAB_API_TOKEN, and EVALTOOL_REPO_URL env vars to run evaltool tests."
)
workspace = llm_venv.get_working_directory()
clone_dir = os.path.join(workspace, "eval-tool")
repo_url = f"https://{GITLAB_API_USER}:{GITLAB_API_TOKEN}@{EVALTOOL_REPO_URL}"
branch_name = "dev/0.9"
from evaltool.constants import EVALTOOL_SETUP_SCRIPT
evaltool_setup_cmd = [
EVALTOOL_SETUP_SCRIPT, "-b", branch_name, "-d", clone_dir, "-r",
repo_url
]
call(" ".join(evaltool_setup_cmd), shell=True)
return clone_dir
@pytest.fixture(scope="function")
def engine_dir(llm_venv, capfd):
"Get engine dir"
engine_path = os.path.join(llm_venv.get_working_directory(), "engines")
print_storage_usage(llm_venv.get_working_directory(),
"before removing existing engines", capfd)
# clean the engine dir for each case.
cur_time = time.time()
expire = time.time() + 60
while exists(engine_path) and cur_time < expire:
shutil.rmtree(engine_path, ignore_errors=True)
time.sleep(2)
cur_time = time.time()
print_storage_usage(llm_venv.get_working_directory(),
"after removing existing engines", capfd)
return engine_path
@pytest.fixture(scope="function")
def cmodel_dir(llm_venv):
"converted model dir"
model_dir = os.path.join(llm_venv.get_working_directory(), "cmodels")
yield model_dir
if exists(model_dir):
shutil.rmtree(model_dir)
@pytest.fixture(scope="module")
def qcache_dir(llm_venv, llm_root):
"get quantization cache dir"
defs.ci_profiler.start("qcache_dir")
cache_dir = os.path.join(llm_venv.get_working_directory(), "qcache")
quantization_root = os.path.join(llm_root, "examples", "quantization")
import platform
# Fix the issue that the requirements.txt is not available on aarch64.
if "aarch64" not in platform.machine() and get_sm_version() >= 89:
llm_venv.run_cmd([
"-m", "pip", "install", "-r",
os.path.join(quantization_root, "requirements.txt")
])
if not exists(cache_dir):
makedirs(cache_dir)
yield cache_dir
if exists(cache_dir):
shutil.rmtree(cache_dir)
defs.ci_profiler.stop("qcache_dir")
print(
f"qcache_dir: {defs.ci_profiler.elapsed_time_in_sec('qcache_dir')} sec")
@pytest.fixture(scope="module")
def qcache_dir_without_install_package(llm_venv, llm_root):
"get quantization cache dir"
defs.ci_profiler.start("qcache_dir_without_install_package")
cache_dir = os.path.join(llm_venv.get_working_directory(), "qcache")
if not exists(cache_dir):
makedirs(cache_dir)
yield cache_dir
if exists(cache_dir):
shutil.rmtree(cache_dir)
defs.ci_profiler.stop("qcache_dir_without_install_package")
print(
f"qcache_dir_without_install_package: {defs.ci_profiler.elapsed_time_in_sec('qcache_dir_without_install_package')} sec"
)
@pytest.fixture(scope="module")
def star_attention_input_root(llm_root):
"Get star attention input file dir"
star_attention_input_root = unittest_path() / "_torch" / "multi_gpu"
return star_attention_input_root
@pytest.fixture(autouse=True)
def skip_by_device_count(request):
"fixture for skip less device count"
if request.node.get_closest_marker('skip_less_device'):
device_count = get_device_count()
expected_count = request.node.get_closest_marker(
'skip_less_device').args[0]
if expected_count > int(device_count):
pytest.skip(
f'Device count {device_count} is less than {expected_count}')
@pytest.fixture(autouse=True)
def skip_by_device_memory(request):
"fixture for skip less device memory"
if request.node.get_closest_marker('skip_less_device_memory'):
device_memory = get_device_memory()
expected_memory = request.node.get_closest_marker(
'skip_less_device_memory').args[0]
if expected_memory > int(device_memory):
pytest.skip(
f'Device memory {device_memory} is less than {expected_memory}')
def get_sm_version():
"get compute capability"
with tempfile.TemporaryDirectory() as temp_dirname:
suffix = ".exe" if is_windows() else ""
# TODO: Use NRSU because we can't assume nvidia-smi across all platforms.
cmd = " ".join([
"nvidia-smi" + suffix, "--query-gpu=compute_cap",
"--format=csv,noheader"
])
output = check_output(cmd, shell=True, cwd=temp_dirname)
compute_cap = output.strip().split("\n")[0]
sm_major, sm_minor = list(map(int, compute_cap.split(".")))
return sm_major * 10 + sm_minor
skip_pre_ada = pytest.mark.skipif(
get_sm_version() < 89,
reason="This test is not supported in pre-Ada architecture")
skip_pre_hopper = pytest.mark.skipif(
get_sm_version() < 90,
reason="This test is not supported in pre-Hopper architecture")
skip_pre_blackwell = pytest.mark.skipif(
get_sm_version() < 100,
reason="This test is not supported in pre-Blackwell architecture")
skip_post_blackwell = pytest.mark.skipif(
get_sm_version() >= 100,
reason="This test is not supported in post-Blackwell architecture")
skip_no_nvls = pytest.mark.skipif(not ipc_nvls_supported(),
reason="NVLS is not supported")
def skip_fp8_pre_ada(use_fp8):
"skip fp8 tests if sm version less than 8.9"
if use_fp8 and get_sm_version() < 89:
pytest.skip("FP8 is not supported on pre-Ada architectures")
def skip_fp4_pre_blackwell(use_fp4):
"skip fp4 tests if sm version less than 10.0"
if use_fp4 and get_sm_version() < 100:
pytest.skip("FP4 is not supported on pre-Blackwell architectures")
@pytest.fixture(autouse=True)
def skip_device_not_contain(request):
"skip test if device not contain keyword"
if request.node.get_closest_marker('skip_device_not_contain'):
keyword_list = request.node.get_closest_marker(
'skip_device_not_contain').args[0]
device = get_gpu_device_list()[0]
if not any(keyword in device for keyword in keyword_list):
pytest.skip(
f"Device {device} does not contain keyword in {keyword_list}.")
def get_gpu_device_list():
"get device list"
with tempfile.TemporaryDirectory() as temp_dirname:
suffix = ".exe" if is_windows() else ""
# TODO: Use NRSU because we can't assume nvidia-smi across all platforms.
cmd = " ".join(["nvidia-smi" + suffix, "-L"])
output = check_output(cmd, shell=True, cwd=temp_dirname)
return [l.strip() for l in output.strip().split("\n")]
def get_device_count():
"return device count"
return len(get_gpu_device_list())
def get_device_memory():
"get gpu memory"
memory = 0
with tempfile.TemporaryDirectory() as temp_dirname:
suffix = ".exe" if is_windows() else ""
# TODO: Use NRSU because we can't assume nvidia-smi across all platforms.
cmd = " ".join([
"nvidia-smi" + suffix, "--query-gpu=memory.total",
"--format=csv,noheader"
])
output = check_output(cmd, shell=True, cwd=temp_dirname)
memory = int(output.strip().split()[0])
return memory
#
# When test parameters have an empty id, older versions of pytest ignored that parameter when generating the
# test node's ID completely. This however was actually a bug, and not expected behavior that got fixed in newer
# versions of pytest:https://github.com/pytest-dev/pytest/pull/6607. TRT test defs however rely on this behavior
# for quite a few test names. This is a hacky WAR that restores the old behavior back so that the
# test names do not change. Note: This might break in a future pytest version.
#
# TODO: Remove this hack once the test names are fixed.
#
from _pytest.python import CallSpec2
CallSpec2.id = property(
lambda self: "-".join(map(str, filter(None, self._idlist))))
def pytest_addoption(parser):
parser.addoption(
"--test-list",
"-F",
action="store",
default=None,
help="Path to the file containing the list of tests to run")
parser.addoption(
"--workspace",
"--ws",
action="store",
default=None,
help="Workspace path to store temp data generated during the tests")
parser.addoption(
"--waives-file",
"-S",
action="store",
default=None,
help=
"Specify a file containing a list of waives, one per line. After filtering collected tests, Pytest will "
"apply the waive state specified by this file to the set of tests to be run."
)
parser.addoption(
"--output-dir",
"-O",
action="store",
default=None,
help=
"Directory to store test output. Should point to a new or existing empty directory."
)
parser.addoption(
"--test-prefix",
"-P",
action="store",
default=None,
help=
"It is useful when using such prefix to mapping waive lists for specific GPU, such as 'GH200'"
)
parser.addoption("--regexp",
"-R",
action='store',
default=None,
help="A regexp to specify which tests to run")
parser.addoption(
"--apply-test-list-correction",
"-C",
action='store_true',
help=
"Attempt to automatically correct invalid test names in filter files and print the correct name in terminal. "
"If the correct name cannot be determined, the invalid test name will be printed to the terminal as well."
)
parser.addoption("--perf",
action="store_true",
help="'--perf' will run perf tests")
parser.addoption(
"--perf-log-formats",
help=
"Supply either 'yaml' or 'csv' as values. Supply multiple same flags for multiple formats.",
action="append",
default=[])
@pytest.hookimpl(trylast=True)
def pytest_generate_tests(metafunc: pytest.Metafunc):
if metafunc.definition.function.__name__ != 'test_unittests_v2':
return
testlist_path = metafunc.config.getoption("--test-list")
if not testlist_path:
return
with open(testlist_path, "r") as f:
lines = f.readlines()
lines = preprocess_test_list_lines(testlist_path, lines)
uts = []
for line in lines:
if line.startswith("unittest/"):
uts.append(line.strip())
metafunc.parametrize("case", uts, ids=lambda x: x)
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_collection_modifyitems(session, config, items):
testlist_path = config.getoption("--test-list")
waives_file = config.getoption("--waives-file")
test_prefix = config.getoption("--test-prefix")
perf_test = config.getoption("--perf")
if perf_test:
global ALL_PYTEST_ITEMS
ALL_PYTEST_ITEMS = None
import copy
# Do not import at global level since that would create cyclic imports.
from .perf.test_perf import generate_perf_tests
# Perf tests are generated based on the test list to speed up the test collection time.
items = generate_perf_tests(session, config, items)
ALL_PYTEST_ITEMS = copy.copy(items)
if test_prefix:
# Override the internal nodeid of each item to contain the correct test prefix.
# This is needed for reporting to correctly process the test name in order to bucket
# it into the appropriate test suite.
for item in items:
item._nodeid = "{}/{}".format(test_prefix, item._nodeid)
regexp = config.getoption("--regexp")
if testlist_path:
modify_by_test_list(testlist_path, items, config)
if regexp is not None:
deselect_by_regex(regexp, items, test_prefix, config)
if waives_file:
apply_waives(waives_file, items, config)
# We have to remove prefix temporarily before splitting the test list
# After that change back the test id.
for item in items:
if test_prefix and item._nodeid.startswith(f"{test_prefix}/"):
item._nodeid = item._nodeid[len(f"{test_prefix}/"):]
yield
for item in items:
if test_prefix:
item._nodeid = f"{test_prefix}/{item._nodeid}"
def deselect_by_regex(regexp, items, test_prefix, config):
"""Filter out tests based on the patterns specified in the given list of regular expressions.
If a test matches *any* of the expressions in the list it is considered selected."""
compiled_regexes = []
regex_list = []
r = re.compile(regexp)
compiled_regexes.append(r)
regex_list.append(regexp)
selected = []
deselected = []
corrections = get_test_name_corrections_v2(set(regex_list),
set(it.nodeid for it in items),
TestCorrectionMode.REGEX)
handle_corrections(corrections, test_prefix)
for item in items:
found = False
for regex in compiled_regexes:
if regex.search(item.nodeid):
found = True
break
if found:
selected.append(item)
else:
deselected.append(item)
if deselected:
config.hook.pytest_deselected(items=deselected)
items[:] = selected
@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
report = outcome.get_result()
if call.when == "call":
report.file = str(item.fspath)
report.line = str(item.location[1])
report.url = ""
@pytest.fixture(scope="session")
def all_pytest_items():
"""
Provides all pytest items available in the current test definitions, before any
filtering has been applied.
"""
return ALL_PYTEST_ITEMS
@pytest.fixture(scope="session")
def turtle_root():
return os.path.dirname(os.path.dirname(__file__))
@pytest.fixture(scope="function")
def test_case(request, llm_root):
"get test case"
test_cases_file = "tests/integration/defs/test_cases.yml"
input_file_dir = "tests/integration/test_input_files"
test_cases_file_path = os.path.join(llm_root, test_cases_file)
case_name = request.param
with open(test_cases_file_path, 'r', encoding='UTF-8') as file:
test_cases = yaml.safe_load(file)
case = test_cases["test_cases"][case_name]
input_file = case["input_file"]
case["input_file"] = os.path.join(llm_root, input_file_dir, input_file)
return case
def check_nvlink():
"check nvlink status"
with tempfile.TemporaryDirectory() as temp_dirname:
try:
suffix = ".exe" if is_windows() else ""
# TODO: Use NRSU because we can't assume nvidia-smi across all platforms.
cmd = " ".join(["nvidia-smi" + suffix, "nvlink", "-s", "-i", "0"])
output = check_output(cmd, shell=True, cwd=temp_dirname)
except sp.CalledProcessError:
return False
if len(output.strip()) == 0:
return False
return "inActive" not in output.strip()
skip_nvlink_inactive = pytest.mark.skipif(check_nvlink() is False,
reason="nvlink is inactive.")
@pytest.fixture(scope="function")
def eval_venv(llm_venv):
"set UCC_TEAM_IDS_POOL_SIZE=1024"
llm_venv._new_env["UCC_TEAM_IDS_POOL_SIZE"] = "1024"
yield llm_venv
llm_venv._new_env.pop("UCC_TEAM_IDS_POOL_SIZE")
def get_host_total_memory():
"get host memory Mib"
memory = psutil.virtual_memory().total
return int(memory / 1024 / 1024)
@pytest.fixture(autouse=True)
def skip_by_host_memory(request):
"fixture for skip less host memory"
if request.node.get_closest_marker('skip_less_host_memory'):
host_memory = get_host_total_memory()
expected_memory = request.node.get_closest_marker(
'skip_less_host_memory').args[0]
if expected_memory > int(host_memory):
pytest.skip(
f'Host memory {host_memory} is less than {expected_memory}')
IS_UNDER_CI_ENV = 'JENKINS_HOME' in os.environ
def collect_status():
if not IS_UNDER_CI_ENV:
return
import psutil
import pynvml
pynvml.nvmlInit()
handles = {
idx: pynvml.nvmlDeviceGetHandleByIndex(idx)
for idx in range(pynvml.nvmlDeviceGetCount())
}
gpu_memory = {}
for idx, device in handles.items():
total_used = pynvml.nvmlDeviceGetMemoryInfo(device).used // 1024 // 1024
total = pynvml.nvmlDeviceGetMemoryInfo(device).total // 1024 // 1024
detail = pynvml.nvmlDeviceGetComputeRunningProcesses(device)
process = {}
for entry in detail:
host_memory_in_mbs = -1
try:
host_memory_in_mbs = psutil.Process(
entry.pid).memory_full_info().uss // 1024 // 1024
process[entry.pid] = (entry.usedGpuMemory // 1024 // 1024,
host_memory_in_mbs)
except:
pass
gpu_memory[idx] = {
"total_used": total_used,
'total': total,
"process": process
}
print('\nCurrent memory status:')
print(gpu_memory)
@pytest.hookimpl(wrapper=True)
def pytest_runtest_protocol(item, nextitem):
ret = yield
collect_status()
return ret
@pytest.fixture(scope="function")
def deterministic_test_root(llm_root, llm_venv):
"Get deterministic test root"
deterministic_root = os.path.join(llm_root,
"tests/integration/defs/deterministic")
return deterministic_root
@pytest.fixture(scope="function")
def disaggregated_test_root(llm_root, llm_venv):
"Get disaggregated test root"
disaggregated_root = os.path.join(llm_root,
"tests/integration/defs/disaggregated")
return disaggregated_root