mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
2289 lines
81 KiB
Python
2289 lines
81 KiB
Python
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import datetime
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess as sp
|
|
import tempfile
|
|
import time
|
|
import urllib.request
|
|
import warnings
|
|
from functools import wraps
|
|
from pathlib import Path
|
|
from typing import Iterable, Sequence
|
|
|
|
import defs.ci_profiler
|
|
import psutil
|
|
import pytest
|
|
import torch
|
|
import tqdm
|
|
import yaml
|
|
from _pytest.mark import ParameterSet
|
|
|
|
from tensorrt_llm.bindings import ipc_nvls_supported
|
|
|
|
from .perf.gpu_clock_lock import GPUClockLock
|
|
from .perf.session_data_writer import SessionDataWriter
|
|
from .test_list_parser import (TestCorrectionMode, apply_waives,
|
|
get_test_name_corrections_v2, handle_corrections,
|
|
modify_by_test_list, preprocess_test_list_lines)
|
|
from .trt_test_alternative import (call, check_output, exists, is_windows,
|
|
is_wsl, makedirs, print_info, print_warning,
|
|
wsl_to_win_path)
|
|
|
|
try:
|
|
from llm import trt_environment
|
|
except ImportError:
|
|
trt_environment = None
|
|
|
|
# TODO: turn off this when the nightly storage issue is resolved.
|
|
DEBUG_CI_STORAGE = os.environ.get("DEBUG_CI_STORAGE", False)
|
|
GITLAB_API_USER = os.environ.get("GITLAB_API_USER")
|
|
GITLAB_API_TOKEN = os.environ.get("GITLAB_API_TOKEN")
|
|
|
|
|
|
def print_storage_usage(path, tag, capfd):
|
|
if DEBUG_CI_STORAGE:
|
|
stat = shutil.disk_usage(path)
|
|
with capfd.disabled():
|
|
print_info(
|
|
f"\nUsage of {path} {stat} @{tag}, used in GB: {stat.used/(2**30)}"
|
|
)
|
|
|
|
|
|
def wget(url, out):
|
|
filename = os.path.basename(url)
|
|
os.makedirs(out, exist_ok=True)
|
|
urllib.request.urlretrieve(url, os.path.join(out, filename))
|
|
|
|
|
|
def llm_models_root() -> str:
|
|
'''return LLM_MODELS_ROOT path if it is set in env, assert when it's set but not a valid path
|
|
'''
|
|
DEFAULT_LLM_MODEL_ROOT = os.path.join("/scratch.trt_llm_data", "llm-models")
|
|
LLM_MODELS_ROOT = os.environ.get("LLM_MODELS_ROOT", DEFAULT_LLM_MODEL_ROOT)
|
|
|
|
return LLM_MODELS_ROOT
|
|
|
|
|
|
def tests_path() -> Path:
|
|
return (Path(os.path.dirname(__file__)) / "../..").resolve()
|
|
|
|
|
|
def unittest_path() -> Path:
|
|
return tests_path() / "unittest"
|
|
|
|
|
|
def integration_path() -> Path:
|
|
return tests_path() / "integration"
|
|
|
|
|
|
def cached_in_llm_models_root(path_relative_to_llm_models_root,
|
|
fail_if_path_is_invalid=False):
|
|
'''
|
|
Use this decorator to declare a cached path in the LLM_MODELS_ROOT directory.
|
|
|
|
That decorator is intended to be used with pytest.fixture functions which prepare and return a data path for some tests.
|
|
|
|
The cache is only queried when llm_models_root() does not return None, and the cache is skipped otherwise.
|
|
When the cache is queried, and the specified path does not exist, the function:
|
|
|
|
- Triggers an AssertionFailure when fail_if_path_is_invalid is True,
|
|
- Ignore the invalid path and fallbacks to calling the fixture otherwise.
|
|
|
|
The purpose of the `fail_if_path_is_invalid` is the following:
|
|
- If you submit a test and the data is not in the cached NFS LLM_MODELS_ROOT dir yet, you can use `fail_if_path_is_invalid=False` (the default).
|
|
In that case, the fixture will use the fallback path and ignore the cache miss in the CI. After submitting the data to the cached NFS LLM_MODELS_ROOT dir,
|
|
your test will automatically pickup the cached data.
|
|
|
|
- If your data is known to always be in the LLM_MODELS_ROOT, and you want to make sure that the test fails loudly when it misses in cache,
|
|
you should specify fail_if_path_is_invalid=True to force the failure. It is useful for when a cache miss will cause a big performance drop for the CI jobs.
|
|
|
|
Example:
|
|
If you have a fixture which downloads the SantaCoder repo and returns its path for one SantaCoder test, you can do the following:
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_gpt2_santacoder_model_root(llm_venv):
|
|
workspace = llm_venv.get_working_directory()
|
|
gpt2_santacoder_model_root = os.path.join(workspace, "santacoder")
|
|
call(
|
|
f"git clone https://huggingface.co/bigcode/santacoder {gpt2_santacoder_model_root}",
|
|
shell=True)
|
|
return gpt2_santacoder_model_root
|
|
|
|
At some point, if you decide to cache the SantaCoder in the LLM_MODELS_ROOT, you can decorate the fixture to enforce the test to
|
|
use the ${LLM_MODELS_ROOT}/santacoder cached directory. You can upload SantaCoder to that location before or after submitting
|
|
this code since there is a fallback path to clone the repo if it is not found in cache.
|
|
|
|
@pytest.fixture(scope="session")
|
|
@cached_in_llm_models_root("santacoder")
|
|
def llm_gpt2_santacoder_model_root(llm_venv):
|
|
... keep the original code
|
|
'''
|
|
|
|
def wrapper(f):
|
|
|
|
@wraps(f)
|
|
def decorated(*args, **kwargs):
|
|
if llm_models_root() is not None:
|
|
cached_dir = f"{llm_models_root()}/{path_relative_to_llm_models_root}"
|
|
if os.path.exists(cached_dir):
|
|
return cached_dir
|
|
elif fail_if_path_is_invalid:
|
|
assert False, f"{cached_dir} does not exist, and fail_if_path_is_invalid is True, please check the cache directory"
|
|
return f(*args, **kwargs)
|
|
|
|
return decorated
|
|
|
|
return wrapper
|
|
|
|
|
|
# Fixture about whether the current pipeline is running in TRT environment.
|
|
@pytest.fixture(scope="session")
|
|
def is_trt_environment():
|
|
return trt_environment is not None
|
|
|
|
|
|
# Helper function to get llm_root. Do not define it as a fixture so that this
|
|
# function can be used during test collection phase.
|
|
def get_llm_root(trt_config=None, gitlab_token=None):
|
|
if trt_environment:
|
|
return trt_environment.setup_tensorrt_llm_repo(trt_config, gitlab_token)
|
|
llm_repo_root = os.environ.get("LLM_ROOT", None)
|
|
if llm_repo_root is None:
|
|
llm_repo_root = os.path.dirname(
|
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
print_warning(
|
|
f"The LLM_ROOT env var is not defined! Using {llm_repo_root} as LLM_ROOT."
|
|
)
|
|
return llm_repo_root
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_root():
|
|
return get_llm_root()
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_backend_root():
|
|
llm_root_directory = get_llm_root()
|
|
llm_backend_repo_root = os.path.join(llm_root_directory, "triton_backend")
|
|
return llm_backend_repo_root
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_datasets_root() -> str:
|
|
return os.path.join(llm_models_root(), "datasets")
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_rouge_root() -> str:
|
|
return os.path.join(llm_models_root(), "rouge")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def bert_example_root(llm_root):
|
|
"Get bert example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core", "bert")
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def enc_dec_example_root(llm_root):
|
|
"Get encoder-decoder example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"enc_dec")
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def whisper_example_root(llm_root, llm_venv):
|
|
"Get whisper example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"whisper")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def opt_example_root(llm_root, llm_venv):
|
|
"Get opt example root"
|
|
|
|
example_root = os.path.join(llm_root, "examples", "models", "contrib",
|
|
"opt")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llama_example_root(llm_root, llm_venv):
|
|
"Get llama example root"
|
|
|
|
example_root = os.path.join(llm_root, "examples", "models", "core", "llama")
|
|
try:
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
except:
|
|
print("pip install error!")
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llmapi_example_root(llm_root, llm_venv):
|
|
"Get llm api example root"
|
|
|
|
example_root = os.path.join(llm_root, "examples", "llm-api")
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def disaggregated_example_root(llm_root, llm_venv):
|
|
"Get disaggregated example root"
|
|
|
|
example_root = os.path.join(llm_root, "examples", "disaggregated")
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def gemma_example_root(llm_root, llm_venv):
|
|
"Get gemma example root"
|
|
|
|
example_root = os.path.join(llm_root, "examples", "models", "core", "gemma")
|
|
# https://nvbugs/4559583 Jax dependency broke the entire pipeline in TRT container
|
|
# due to the dependency incompatibility with torch, which forced reinstall everything
|
|
# and caused pipeline to fail. We manually install gemma dependency as a WAR.
|
|
llm_venv.run_cmd(["-m", "pip", "install", "safetensors~=0.4.1", "nltk"])
|
|
# Install Jax because it breaks dependency
|
|
import platform
|
|
google_extension = [
|
|
"-f",
|
|
"https://storage.googleapis.com/jax-releases/jax_cuda_releases.html"
|
|
]
|
|
|
|
# WAR the new posting of "nvidia-cudnn-cu12~=9.0".
|
|
# "jax[cuda12_pip]~=0.4.19" specifies "nvidia-cudnn-cu12>=8.9" but actually requires "nvidia-cudnn-cu12~=8.9".
|
|
if "x86_64" in platform.machine():
|
|
llm_venv.run_cmd(["-m", "pip", "install", "nvidia-cudnn-cu12~=8.9"])
|
|
|
|
if "Windows" in platform.system():
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "jax~=0.4.19", "jaxlib~=0.4.19", "--no-deps"
|
|
] + google_extension)
|
|
else:
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "jax[cuda12_pip]~=0.4.19",
|
|
"jaxlib[cuda12_pip]~=0.4.19", "--no-deps"
|
|
] + google_extension)
|
|
llm_venv.run_cmd(["-m", "pip", "install", "flax~=0.8.0"])
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def gemma_model_root(request):
|
|
"Get gemma model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
if hasattr(request, "param"):
|
|
gemma_model_root = os.path.join(models_root, f"gemma/{request.param}")
|
|
|
|
assert exists(gemma_model_root), f"{gemma_model_root} does not exist!"
|
|
|
|
return gemma_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def minitron_model_root(request):
|
|
"Get minitron model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
if hasattr(request, "param"):
|
|
assert request.param == "4b"
|
|
minitron_model_root = os.path.join(models_root,
|
|
"nemotron/Minitron-4B-Base")
|
|
|
|
assert exists(minitron_model_root), f"{minitron_model_root} does not exist!"
|
|
|
|
return minitron_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def mistral_nemo_model_root(request):
|
|
"Get Mistral Nemo model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if hasattr(request, "param"):
|
|
assert request.param == "Mistral-Nemo-12b-Base"
|
|
mistral_nemo_model_root = os.path.join(models_root,
|
|
"Mistral-Nemo-Base-2407")
|
|
assert exists(
|
|
mistral_nemo_model_root), f"{mistral_nemo_model_root} does not exist!"
|
|
return mistral_nemo_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def mistral_nemo_minitron_model_root(request):
|
|
"Get Mistral Nemo Minitron model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if hasattr(request, "param"):
|
|
assert request.param == "Mistral-NeMo-Minitron-8B-Instruct"
|
|
mistral_nemo_minitron_model_root = os.path.join(
|
|
models_root, "Mistral-NeMo-Minitron-8B-Instruct")
|
|
assert exists(mistral_nemo_minitron_model_root
|
|
), f"{mistral_nemo_minitron_model_root} does not exist!"
|
|
return mistral_nemo_minitron_model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def gpt_example_root(llm_root, llm_venv):
|
|
"Get gpt example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core", "gpt")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def gptj_example_root(llm_root, llm_venv):
|
|
"Get gptj example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "contrib",
|
|
"gptj")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def glm_4_9b_example_root(llm_root, llm_venv):
|
|
"Get glm-4-9b example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"glm-4-9b")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def exaone_example_root(llm_root, llm_venv):
|
|
"Get EXAONE example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"exaone")
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_exaone_model_root(request) -> str:
|
|
"Get EXAONE model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
exaone_model_root = os.path.join(models_root, "exaone")
|
|
if hasattr(request, "param"):
|
|
if request.param == "exaone_3.0_7.8b_instruct":
|
|
exaone_model_root = os.path.join(models_root, "exaone")
|
|
elif request.param == "exaone_deep_2.4b":
|
|
exaone_model_root = os.path.join(models_root, "EXAONE-Deep-2.4B")
|
|
|
|
return exaone_model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def falcon_example_root(llm_root, llm_venv):
|
|
"Get falcon example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "contrib",
|
|
"falcon")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def plugin_gen_path(llm_root):
|
|
"Path to the plugin_gen.py script"
|
|
return os.path.join(llm_root, "tensorrt_llm", "tools", "plugin_gen",
|
|
"plugin_gen.py")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def internlm2_example_root(llm_root, llm_venv):
|
|
"Get internlm2 example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"internlm2")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def qwen_example_root(llm_root, llm_venv):
|
|
"Get qwen example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core", "qwen")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def draft_target_model_example_root(llm_root, llm_venv):
|
|
"Get Draft-Target-Model example root"
|
|
example_root = os.path.join(llm_root, "examples", "draft_target_model")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def prompt_lookup_example_root(llm_root, llm_venv):
|
|
"Get Prompt-Lookup example root"
|
|
example_root = os.path.join(llm_root, "examples", "prompt_lookup")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def medusa_example_root(llm_root, llm_venv):
|
|
"Get medusa example root"
|
|
example_root = os.path.join(llm_root, "examples", "medusa")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def redrafter_example_root(llm_root, llm_venv):
|
|
"Get ReDrafter example root"
|
|
example_root = os.path.join(llm_root, "examples", "redrafter")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def eagle_example_root(llm_root, llm_venv):
|
|
"Get EAGLE example root"
|
|
example_root = os.path.join(llm_root, "examples", "eagle")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def mamba_example_root(llm_root, llm_venv):
|
|
"Get mamba example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core", "mamba")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
yield example_root
|
|
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(llm_root, "requirements.txt")
|
|
])
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def recurrentgemma_example_root(llm_root, llm_venv):
|
|
"Get recurrentgemma example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"recurrentgemma")
|
|
|
|
# install requirements
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
yield example_root
|
|
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(llm_root, "requirements.txt")
|
|
])
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def nemotron_nas_example_root(llm_root, llm_venv):
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"nemotron_nas")
|
|
|
|
yield example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def nemotron_example_root(llm_root, llm_venv):
|
|
"Get nemotron example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"nemotron")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def commandr_example_root(llm_root, llm_venv):
|
|
"Get commandr example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "core",
|
|
"commandr")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def deepseek_v2_example_root(llm_root, llm_venv):
|
|
"Get deepseek v2 example root"
|
|
example_root = os.path.join(llm_root, "examples", "models", "contrib",
|
|
"deepseek_v2")
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(example_root, "requirements.txt")
|
|
])
|
|
|
|
return example_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def deepseek_v3_model_root(request):
|
|
models_root = llm_models_root()
|
|
if (request.param == "DeepSeek-V3"):
|
|
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3")
|
|
elif (request.param == "DeepSeek-V3-Lite-bf16"):
|
|
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite",
|
|
"bf16")
|
|
elif (request.param == "DeepSeek-V3-Lite-fp8"):
|
|
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite",
|
|
"fp8")
|
|
elif (request.param == "DeepSeek-V3-Lite-nvfp4_moe_only"):
|
|
deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite",
|
|
"nvfp4_moe_only")
|
|
assert exists(
|
|
deepseek_v3_model_root), f"{deepseek_v3_model_root} does not exist!"
|
|
return deepseek_v3_model_root
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def trt_performance_cache_name():
|
|
return "performance.cache"
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def trt_performance_cache_fpath(llm_venv, trt_performance_cache_name):
|
|
workspace = llm_venv.get_working_directory()
|
|
fpath = os.path.join(workspace, trt_performance_cache_name)
|
|
if is_wsl():
|
|
return wsl_to_win_path(fpath)
|
|
return fpath
|
|
|
|
|
|
# Get the executing perf case name
|
|
@pytest.fixture(autouse=True)
|
|
def perf_case_name(request):
|
|
return request.node.nodeid
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def output_dir(request):
|
|
output = request.config.getoption("--output-dir")
|
|
if output:
|
|
os.makedirs(str(output), exist_ok=True)
|
|
return output
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def trt_gpu_clock_lock(request):
|
|
"""
|
|
Fixture for the GPUClockLock, used to interface with pynvml to get system properties and to lock/monitor GPU clocks.
|
|
"""
|
|
gpu_list = get_gpu_device_list()
|
|
gpu_ids = [gpu.split()[1][:-1] for gpu in gpu_list] # Extract GPU IDs
|
|
gpu_ids_str = ",".join(gpu_ids)
|
|
gpu_clock_lock = GPUClockLock(
|
|
gpu_id=gpu_ids_str,
|
|
interval_ms=1000.0,
|
|
)
|
|
|
|
yield gpu_clock_lock
|
|
|
|
gpu_clock_lock.teardown()
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_session_data_writer(request, trt_gpu_clock_lock, output_dir):
|
|
"""
|
|
Fixture for the SessionDataWriter, used to write session data to output directory.
|
|
"""
|
|
session_data_writer = SessionDataWriter(
|
|
log_output_directory=output_dir,
|
|
output_formats=request.config.getoption("--perf-log-formats"),
|
|
gpu_clock_lock=trt_gpu_clock_lock,
|
|
)
|
|
|
|
yield session_data_writer
|
|
|
|
session_data_writer.teardown()
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def custom_user_workspace(request):
|
|
return request.config.getoption("--workspace")
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_venv(llm_root, custom_user_workspace):
|
|
workspace_dir = custom_user_workspace
|
|
subdir = datetime.datetime.now().strftime("ws-%Y-%m-%d-%H-%M-%S")
|
|
if workspace_dir is None:
|
|
workspace_dir = "llm-test-workspace"
|
|
workspace_dir = os.path.join(workspace_dir, subdir)
|
|
from defs.local_venv import PythonVenvRunnerImpl
|
|
venv = PythonVenvRunnerImpl("", "", "python3",
|
|
os.path.join(os.getcwd(), workspace_dir))
|
|
yield venv
|
|
# Remove the workspace directory
|
|
if os.path.exists(workspace_dir):
|
|
print(f"Cleaning up workspace: {workspace_dir}")
|
|
try:
|
|
shutil.rmtree(workspace_dir)
|
|
except Exception as e:
|
|
print(f"Failed to clean up workspace: {e}")
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
@cached_in_llm_models_root("gpt-next/megatron_converted_843m_tp1_pp1.nemo",
|
|
True)
|
|
def gpt_next_root():
|
|
"get gpt-next/megatron_converted_843m_tp1_pp1.nemo"
|
|
raise RuntimeError("megatron_converted_843m_tp1_pp1.nemo must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def bert_model_root(hf_bert_model_root):
|
|
"Get bert model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
bert_model_name = hf_bert_model_root
|
|
bert_model_root = os.path.join(models_root, bert_model_name)
|
|
|
|
assert os.path.exists(
|
|
bert_model_root
|
|
), f"{bert_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
|
|
return (bert_model_name, bert_model_root)
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def enc_dec_model_root(request):
|
|
"Get enc-dec model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
tllm_model_name = request.param
|
|
if not "wmt" in tllm_model_name:
|
|
# HuggingFace root
|
|
enc_dec_model_root = os.path.join(models_root, tllm_model_name)
|
|
else:
|
|
# FairSeq root
|
|
enc_dec_model_root = os.path.join(models_root, "fairseq-models",
|
|
tllm_model_name)
|
|
|
|
assert os.path.exists(
|
|
enc_dec_model_root
|
|
), f"{enc_dec_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
|
|
return (tllm_model_name, enc_dec_model_root)
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def whisper_model_root(request):
|
|
"Get whisper model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
assert request.param in [
|
|
"large-v2", "large-v3"
|
|
], "whisper only supports large-v2 or large-v3 for now"
|
|
tllm_model_name = request.param
|
|
whisper_model_root = os.path.join(models_root, "whisper-models",
|
|
tllm_model_name)
|
|
assert os.path.exists(
|
|
whisper_model_root
|
|
), f"{whisper_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
|
|
return (tllm_model_name, whisper_model_root)
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def whisper_example_audio_file(whisper_model_root):
|
|
return os.path.join(whisper_model_root[1], "1221-135766-0002.wav")
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def multimodal_model_root(request, llm_venv):
|
|
"Get multimodal model root"
|
|
models_root = os.path.join(llm_models_root(), 'multimodals')
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
tllm_model_name = request.param
|
|
if 'VILA' in tllm_model_name:
|
|
models_root = os.path.join(llm_models_root(), 'vila')
|
|
if 'cogvlm-chat' in tllm_model_name:
|
|
models_root = os.path.join(llm_models_root(), 'cogvlm-chat')
|
|
if 'video-neva' in tllm_model_name:
|
|
models_root = os.path.join(llm_models_root(), 'video-neva')
|
|
tllm_model_name = tllm_model_name + ".nemo"
|
|
if 'neva-22b' in tllm_model_name:
|
|
models_root = os.path.join(llm_models_root(), 'neva')
|
|
tllm_model_name = tllm_model_name + ".nemo"
|
|
elif 'Llama-3.2' in tllm_model_name:
|
|
models_root = os.path.join(llm_models_root(), 'llama-3.2-models')
|
|
elif 'Mistral-Small' in tllm_model_name:
|
|
models_root = llm_models_root()
|
|
|
|
multimodal_model_root = os.path.join(models_root, tllm_model_name)
|
|
|
|
if 'llava-onevision' in tllm_model_name and 'video' in tllm_model_name:
|
|
multimodal_model_root = multimodal_model_root[:-6]
|
|
elif 'llava-v1.6' in tllm_model_name and 'vision-trtllm' in tllm_model_name:
|
|
multimodal_model_root = multimodal_model_root[:-14]
|
|
|
|
assert os.path.exists(
|
|
multimodal_model_root
|
|
), f"{multimodal_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
|
|
yield (tllm_model_name, multimodal_model_root)
|
|
|
|
if 'llava-onevision' in tllm_model_name:
|
|
llm_venv.run_cmd(['-m', 'pip', 'uninstall', 'llava', '-y'])
|
|
|
|
|
|
def remove_file(fn):
|
|
if os.path.isfile(fn) or os.path.islink(fn):
|
|
os.remove(fn)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("replit-code-v1_5-3b", True)
|
|
def llm_replit_code_v1_5_3b_model_root():
|
|
"Get replit-code-v1_5-3b model root"
|
|
raise RuntimeError("replit-code-v1_5-3b must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("gpt2", True)
|
|
def llm_gpt2_model_root():
|
|
"Get gpt2 model root"
|
|
raise RuntimeError("gpt2 must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("gpt2-medium", True)
|
|
def llm_gpt2_medium_model_root():
|
|
"Get gpt2 medium model root"
|
|
raise RuntimeError("gpt2-medium must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("GPT-2B-001_bf16_tp1.nemo", True)
|
|
def llm_gpt2_next_model_root():
|
|
"get gpt-2b-001_bf16_tp1.nemo"
|
|
raise RuntimeError("GPT-2B-001_bf16_tp1.nemo must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("santacoder", True)
|
|
def llm_gpt2_santacoder_model_root():
|
|
"get santacoder data"
|
|
raise RuntimeError("santacoder must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llm_gpt2_starcoder_model_root(llm_venv, request):
|
|
"get starcoder-model"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
starcoder_model_root = os.path.join(models_root, "starcoder-model")
|
|
if hasattr(request, "param"):
|
|
if request.param == "starcoder":
|
|
starcoder_model_root = os.path.join(models_root, "starcoder-model")
|
|
elif request.param == "starcoderplus":
|
|
starcoder_model_root = os.path.join(models_root, "starcoderplus")
|
|
elif request.param == "starcoder2":
|
|
starcoder_model_root = os.path.join(models_root, "starcoder2-model")
|
|
|
|
return starcoder_model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("starcoder2-3b", True)
|
|
def llm_gpt2_starcoder2_model_root():
|
|
"get starcoder2-3b"
|
|
raise RuntimeError("starcoder2-3b must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def starcoder_model_root(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if request.param == "starcoder":
|
|
starcoder_model_root = os.path.join(models_root, "starcoder-model")
|
|
elif request.param == "starcoder2-15b":
|
|
starcoder_model_root = os.path.join(models_root, "starcoder2-model")
|
|
elif request.param == "starcoder2-3b":
|
|
starcoder_model_root = os.path.join(models_root, "starcoder2-3b")
|
|
elif request.param == "starcoderplus":
|
|
starcoder_model_root = os.path.join(models_root, "starcoderplus")
|
|
|
|
assert os.path.exists(
|
|
starcoder_model_root
|
|
), f"{starcoder_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return starcoder_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_gpt2b_lora_model_root(request):
|
|
"get gpt2b lora model"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
model_root_list = []
|
|
lora_root = os.path.join(models_root, "lora", "gpt-next-2b")
|
|
if hasattr(request, "param"):
|
|
if isinstance(request.param, tuple):
|
|
model_list = list(request.param)
|
|
else:
|
|
model_list = [request.param]
|
|
|
|
for item in model_list:
|
|
if item == "gpt2b_lora-900.nemo":
|
|
model_root_list.append(
|
|
os.path.join(lora_root, "gpt2b_lora-900.nemo"))
|
|
elif item == "gpt2b_lora-stories.nemo":
|
|
model_root_list.append(
|
|
os.path.join(lora_root, "gpt2b_lora-stories.nemo"))
|
|
|
|
return ",".join(model_root_list)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llama_tokenizer_model_root():
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
# Use llama-7b-hf to load tokenizer
|
|
llama_tokenzier_model_root = os.path.join(models_root, "llama-models",
|
|
"llama-7b-hf")
|
|
return llama_tokenzier_model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llama_v2_tokenizer_model_root():
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
llama_v2_tokenizer_model_root = os.path.join(models_root, "llama-models-v2")
|
|
|
|
assert os.path.exists(
|
|
llama_v2_tokenizer_model_root
|
|
), f"{llama_v2_tokenizer_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return llama_v2_tokenizer_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llama_model_root(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if request.param == "llama-7b":
|
|
llama_model_root = os.path.join(models_root, "llama-models",
|
|
"llama-7b-hf")
|
|
elif request.param == "llama-30b":
|
|
llama_model_root = os.path.join(models_root, "llama-models",
|
|
"llama-30b-hf")
|
|
elif request.param == "TinyLlama-1.1B-Chat-v1.0":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2",
|
|
"TinyLlama-1.1B-Chat-v1.0")
|
|
elif request.param == "llama-v2-7b":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2", "7B")
|
|
elif request.param == "llama-v2-70b":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2", "70B")
|
|
elif request.param == "llama-v2-70b-hf":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2",
|
|
"llama-v2-70b-hf")
|
|
elif request.param == "Llama-2-7B-AWQ":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2",
|
|
"Llama-2-7B-AWQ")
|
|
elif request.param == "Llama-2-7B-GPTQ":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2",
|
|
"Llama-2-7B-GPTQ")
|
|
elif request.param == "llama-v2-13b-hf":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2",
|
|
"llama-v2-13b-hf")
|
|
elif request.param == "llama-v2-7b-hf":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2",
|
|
"llama-v2-7b-hf")
|
|
elif request.param == "llama-v2-70b-hf":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v2",
|
|
"llama-v2-70b-hf")
|
|
elif request.param == "llama-v3-8b-hf":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v3", "8B")
|
|
elif request.param == "llama-v3-8b-instruct-hf":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v3",
|
|
"llama-v3-8b-instruct-hf")
|
|
elif request.param == "Llama-3-8B-Instruct-Gradient-1048k":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v3",
|
|
"Llama-3-8B-Instruct-Gradient-1048k")
|
|
elif request.param == "Llama-3-70B-Instruct-Gradient-1048k":
|
|
llama_model_root = os.path.join(models_root, "llama-models-v3",
|
|
"Llama-3-70B-Instruct-Gradient-1048k")
|
|
elif request.param == "llama-3.1-405b":
|
|
llama_model_root = os.path.join(models_root, "llama-3.1-model",
|
|
"Meta-Llama-3.1-405B")
|
|
elif request.param == "llama-3.1-405b-fp8":
|
|
llama_model_root = os.path.join(models_root, "llama-3.1-model",
|
|
"Meta-Llama-3.1-405B-FP8")
|
|
elif request.param == "llama-3.1-70b":
|
|
llama_model_root = os.path.join(models_root, "llama-3.1-model",
|
|
"Meta-Llama-3.1-70B")
|
|
elif request.param == "llama-3.1-8b":
|
|
llama_model_root = os.path.join(models_root, "llama-3.1-model",
|
|
"Meta-Llama-3.1-8B")
|
|
elif request.param == "llama-3.1-8b-instruct-hf-fp8":
|
|
llama_model_root = os.path.join(models_root, "llama-3.1-model",
|
|
"Llama-3.1-8B-Instruct-FP8")
|
|
elif request.param == "llama-3.1-8b-hf-nvfp4":
|
|
llama_model_root = os.path.join(models_root, "nvfp4-quantized",
|
|
"Meta-Llama-3.1-8B")
|
|
elif request.param == "llama-3.1-70b-instruct":
|
|
llama_model_root = os.path.join(models_root, "llama-3.1-model",
|
|
"Meta-Llama-3.1-70B-Instruct")
|
|
elif request.param == "llama-3.2-1b":
|
|
llama_model_root = os.path.join(models_root, "llama-3.2-models",
|
|
"Llama-3.2-1B")
|
|
elif request.param == "llama-3.2-3b":
|
|
llama_model_root = os.path.join(models_root, "llama-3.2-models",
|
|
"Llama-3.2-3B")
|
|
assert os.path.exists(
|
|
llama_model_root
|
|
), f"{llama_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return llama_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def code_llama_model_root(request):
|
|
"get CodeLlama model data"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if request.param == "CodeLlama-7b-Instruct":
|
|
codellama_model_root = os.path.join(models_root, "codellama",
|
|
"CodeLlama-7b-Instruct-hf")
|
|
elif request.param == "CodeLlama-13b-Instruct":
|
|
codellama_model_root = os.path.join(models_root, "codellama",
|
|
"CodeLlama-13b-Instruct-hf")
|
|
elif request.param == "CodeLlama-34b-Instruct":
|
|
codellama_model_root = os.path.join(models_root, "codellama",
|
|
"CodeLlama-34b-Instruct-hf")
|
|
elif request.param == "CodeLlama-70b-hf":
|
|
codellama_model_root = os.path.join(models_root, "codellama",
|
|
"CodeLlama-70b-hf")
|
|
return codellama_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def draft_target_model_roots(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
draft_model_root = None
|
|
target_model_root = None
|
|
if request.param == "gpt2":
|
|
draft_model_root = os.path.join(models_root, "gpt2-medium")
|
|
target_model_root = os.path.join(models_root, "gpt2-medium")
|
|
elif request.param == "llama_v2":
|
|
draft_model_root = os.path.join(models_root,
|
|
"llama-models-v2/llama-v2-7b-hf")
|
|
target_model_root = os.path.join(models_root,
|
|
"llama-models-v2/llama-v2-13b-hf")
|
|
|
|
assert os.path.exists(
|
|
draft_model_root
|
|
), f"Draft-Target-Model draft model path {draft_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
assert os.path.exists(
|
|
target_model_root
|
|
), f"Draft-Target-Model target model path {target_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return draft_model_root, target_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def prompt_lookup_root(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if request.param == "gpt2":
|
|
models_root = os.path.join(models_root, "gpt2-medium")
|
|
elif request.param == "llama_v2":
|
|
models_root = os.path.join(models_root,
|
|
"llama-models-v2/llama-v2-13b-hf")
|
|
assert os.path.exists(
|
|
models_root
|
|
), f"Prompt-Lookup model path {models_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return models_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def medusa_model_roots(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
base_model_root_for_medusa = None
|
|
medusa_heads_model_root = None
|
|
if request.param == "medusa-vicuna-7b-v1.3":
|
|
base_model_root_for_medusa = os.path.join(models_root, "vicuna-7b-v1.3")
|
|
medusa_heads_model_root = os.path.join(models_root,
|
|
"medusa-vicuna-7b-v1.3")
|
|
elif request.param == "llama3.1-medusa-8b-hf_v0.1":
|
|
base_model_root_for_medusa = os.path.join(models_root,
|
|
"llama3.1-medusa-8b-hf_v0.1")
|
|
medusa_heads_model_root = base_model_root_for_medusa
|
|
assert os.path.exists(
|
|
base_model_root_for_medusa
|
|
), f"Medusa base model path {base_model_root_for_medusa} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
assert os.path.exists(
|
|
medusa_heads_model_root
|
|
), f"Medusa heads model path {medusa_heads_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return base_model_root_for_medusa, medusa_heads_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def lookahead_model_roots(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
base_model_root_for_lookahead = None
|
|
if request.param == "vicuna-7b-v1.3":
|
|
base_model_root_for_lookahead = os.path.join(models_root,
|
|
"vicuna-7b-v1.3")
|
|
assert os.path.exists(
|
|
base_model_root_for_lookahead
|
|
), f"Lookahead base model path {base_model_root_for_lookahead} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return base_model_root_for_lookahead
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def redrafter_model_roots(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
base_model_root_for_redrafter = None
|
|
redrafter_drafting_model_root = None
|
|
if request.param == "redrafter-vicuna-7b-v1.3":
|
|
base_model_root_for_redrafter = os.path.join(models_root,
|
|
"vicuna-7b-v1.3")
|
|
redrafter_drafting_model_root = os.path.join(
|
|
models_root, "redrafter-vicuna-7b-v1.3")
|
|
assert os.path.exists(
|
|
base_model_root_for_redrafter
|
|
), f"ReDrafter base model path {base_model_root_for_redrafter} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
assert os.path.exists(
|
|
redrafter_drafting_model_root
|
|
), f"ReDrafter heads model path {redrafter_drafting_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return base_model_root_for_redrafter, redrafter_drafting_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def eagle_model_roots(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
base_model_root_for_eagle = None
|
|
eagle_heads_model_root = None
|
|
if request.param == "EAGLE-Vicuna-7B-v1.3":
|
|
# Test the checkpoint released from HF, which requires two separate weights,
|
|
# one for the base model and one for the EagleNets.
|
|
base_model_root_for_eagle = os.path.join(models_root, "vicuna-7b-v1.3")
|
|
eagle_heads_model_root = os.path.join(models_root,
|
|
"EAGLE-Vicuna-7B-v1.3")
|
|
assert os.path.exists(
|
|
base_model_root_for_eagle
|
|
), f"EAGLE base model path {base_model_root_for_eagle} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
assert os.path.exists(
|
|
eagle_heads_model_root
|
|
), f"EAGLE heads model path {eagle_heads_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return base_model_root_for_eagle, eagle_heads_model_root
|
|
|
|
elif request.param == "llama3.1-eagle-8b-hf_v0.5":
|
|
# Test the checkpoint released from ModelOpt, which only requires one weight,
|
|
# which includes both the base model and EagleNets, and is an FP8 datatype.
|
|
modelopt_checkpoint_root_for_eagle = os.path.join(
|
|
models_root, "modelopt-hf-model-hub", "llama3.1-eagle-8b-hf_v0.5")
|
|
assert os.path.exists(
|
|
modelopt_checkpoint_root_for_eagle
|
|
), f"EAGLE ModelOpt checkpoint path {modelopt_checkpoint_root_for_eagle} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return modelopt_checkpoint_root_for_eagle
|
|
else:
|
|
assert "Error Eagle weight's name"
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def mamba_model_root(request):
|
|
"get mamba model data"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
mamba_model_root = os.path.join(models_root, 'mamba', "mamba-130m-hf")
|
|
if hasattr(request, "param"):
|
|
if request.param == "mamba-2.8b":
|
|
mamba_model_root = os.path.join(models_root, 'mamba',
|
|
"mamba-2.8b-hf")
|
|
elif request.param == "mamba-130m":
|
|
mamba_model_root = os.path.join(models_root, 'mamba',
|
|
"mamba-130m-hf")
|
|
elif request.param == "mamba-1.4b":
|
|
mamba_model_root = os.path.join(models_root, 'mamba',
|
|
"mamba-1.4b-hf")
|
|
elif request.param == "mamba-790m":
|
|
mamba_model_root = os.path.join(models_root, 'mamba',
|
|
"mamba-790m-hf")
|
|
elif request.param == "mamba-370m":
|
|
mamba_model_root = os.path.join(models_root, 'mamba',
|
|
"mamba-370m-hf")
|
|
elif request.param == "mamba2-2.7b":
|
|
mamba_model_root = os.path.join(models_root, 'mamba2',
|
|
"mamba2-2.7b")
|
|
elif request.param == "mamba2-1.3b":
|
|
mamba_model_root = os.path.join(models_root, 'mamba2',
|
|
"mamba2-1.3b")
|
|
elif request.param == "mamba2-780m":
|
|
mamba_model_root = os.path.join(models_root, 'mamba2',
|
|
"mamba2-780m")
|
|
elif request.param == "mamba2-370m":
|
|
mamba_model_root = os.path.join(models_root, 'mamba2',
|
|
"mamba2-370m")
|
|
elif request.param == "mamba2-130m":
|
|
mamba_model_root = os.path.join(models_root, 'mamba2',
|
|
"mamba2-130m")
|
|
elif request.param == "mamba-codestral-7B-v0.1":
|
|
mamba_model_root = os.path.join(models_root, 'mamba2',
|
|
"mamba-codestral-7B-v0.1")
|
|
|
|
assert exists(mamba_model_root), f"{mamba_model_root} does not exist!"
|
|
|
|
return mamba_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def recurrentgemma_model_root(request):
|
|
"get recurrentgemma model data"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
assert hasattr(request, "param"), "Param is missing!"
|
|
|
|
if request.param == "recurrentgemma-2b":
|
|
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
|
|
"recurrentgemma-2b")
|
|
elif request.param == "recurrentgemma-2b-it":
|
|
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
|
|
"recurrentgemma-2b-it")
|
|
elif request.param == "recurrentgemma-2b-flax":
|
|
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
|
|
"recurrentgemma-2b-flax", "2b")
|
|
elif request.param == "recurrentgemma-2b-it-flax":
|
|
recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma",
|
|
"recurrentgemma-2b-it-flax",
|
|
"2b-it")
|
|
|
|
assert exists(recurrentgemma_model_root
|
|
), f"{recurrentgemma_model_root} does not exist!"
|
|
|
|
return recurrentgemma_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def nemotron_nas_model_root(request):
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
assert hasattr(request, "param"), "Param is missing!"
|
|
|
|
nemotron_nas_model_root = os.path.join(models_root, "nemotron-nas",
|
|
request.param)
|
|
|
|
assert exists(
|
|
nemotron_nas_model_root), f"{nemotron_nas_model_root} doesn't exist!"
|
|
|
|
return nemotron_nas_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_lora_model_root(request):
|
|
"get lora model path"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
assert hasattr(request, "param"), "Param is missing!"
|
|
model_list = []
|
|
model_root_list = []
|
|
if isinstance(request.param, tuple):
|
|
model_list = list(request.param)
|
|
else:
|
|
model_list = [request.param]
|
|
|
|
for item in model_list:
|
|
if item == "chinese-llama-2-lora-13b":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "llama-models-v2",
|
|
"chinese-llama-2-lora-13b"))
|
|
elif item == "Japanese-Alpaca-LoRA-7b-v0":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "llama-models",
|
|
"Japanese-Alpaca-LoRA-7b-v0"))
|
|
elif item == "luotuo-lora-7b-0.1":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "llama-models", "luotuo-lora-7b-0.1"))
|
|
elif item == "Ko-QWEN-7B-Chat-LoRA":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "Ko-QWEN-7B-Chat-LoRA"))
|
|
elif item == "Qwen1.5-7B-Chat-750Mb-lora":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "Qwen1.5-7B-Chat-750Mb-lora"))
|
|
elif item == "Upcycled-Qwen1.5-MoE2.7B-LoRA":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "Upcycled-Qwen1.5-MoE2.7B-LoRA"))
|
|
elif item == "Phi-3-mini-4k-instruct-ru-lora":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "lora", "phi",
|
|
"Phi-3-mini-4k-instruct-ru-lora"))
|
|
elif item == "peft-lora-starcoder2-15b-unity-copilot":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "lora", "starcoder",
|
|
"peft-lora-starcoder2-15b-unity-copilot"))
|
|
elif item == "chinese-mixtral-lora":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "chinese-mixtral-lora"))
|
|
elif item == "komt-mistral-7b-v1-lora":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "komt-mistral-7b-v1-lora"))
|
|
|
|
return ",".join(model_root_list)
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_dora_model_root(request):
|
|
"get dora model path"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
assert hasattr(request, "param"), "Param is missing!"
|
|
model_list = []
|
|
model_root_list = []
|
|
if isinstance(request.param, tuple):
|
|
model_list = list(request.param)
|
|
else:
|
|
model_list = [request.param]
|
|
|
|
for item in model_list:
|
|
if item == "commonsense-llama-v3-8b-dora-r32":
|
|
model_root_list.append(
|
|
os.path.join(models_root, "llama-models-v3", "DoRA-weights",
|
|
"llama_dora_commonsense_checkpoints", "LLama3-8B",
|
|
"dora_r32"))
|
|
|
|
return ",".join(model_root_list)
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_mistral_model_root(request):
|
|
"get mistral model path"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
model_root = os.path.join(models_root, "mistral-7b-v0.1")
|
|
if request.param == "mistral-7b-v0.1":
|
|
model_root = os.path.join(models_root, "mistral-7b-v0.1")
|
|
if request.param == "komt-mistral-7b-v1":
|
|
model_root = os.path.join(models_root, "komt-mistral-7b-v1")
|
|
if request.param == "mistral-7b-v0.3":
|
|
model_root = os.path.join(models_root, "Mistral-7B-Instruct-v0.3")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_mixtral_model_root(request):
|
|
"get mixtral model path"
|
|
models_root = llm_models_root()
|
|
model_root = os.path.join(models_root, "Mixtral-8x7B-v0.1")
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if request.param == "Mixtral-8x7B-v0.1":
|
|
model_root = os.path.join(models_root, "Mixtral-8x7B-v0.1")
|
|
if request.param == "Mixtral-8x22B-v0.1":
|
|
model_root = os.path.join(models_root, "Mixtral-8x22B-v0.1")
|
|
if request.param == "Mixtral-8x7B-Instruct-v0.1":
|
|
model_root = os.path.join(models_root, "Mixtral-8x7B-Instruct-v0.1")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("mathstral-7B-v0.1", True)
|
|
def llm_mathstral_model_root(llm_venv):
|
|
"return mathstral-7B-v0.1 model root"
|
|
|
|
workspace = llm_venv.get_working_directory()
|
|
long_mathstral_model_root = os.path.join(workspace, "mathstral-7B-v0.1")
|
|
|
|
return long_mathstral_model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("LongAlpaca-7B", True)
|
|
def llm_long_alpaca_model_root(llm_venv):
|
|
"return long alpaca model root"
|
|
|
|
workspace = llm_venv.get_working_directory()
|
|
long_alpaca_model_root = os.path.join(workspace, "LongAlpaca-7B")
|
|
|
|
return long_alpaca_model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("gpt-neox-20b", True)
|
|
def llm_gptneox_model_root(llm_venv):
|
|
"return gptneox model root"
|
|
|
|
workspace = llm_venv.get_working_directory()
|
|
gptneox_model_root = os.path.join(workspace, "gpt-neox-20b")
|
|
|
|
return gptneox_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_phi_model_root(request):
|
|
"return phi model root"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
if 'Phi-3.5' in request.param:
|
|
phi_model_root = os.path.join(models_root, 'Phi-3.5/' + request.param)
|
|
elif 'Phi-3' in request.param:
|
|
phi_model_root = os.path.join(models_root, 'Phi-3/' + request.param)
|
|
else:
|
|
phi_model_root = os.path.join(models_root, request.param)
|
|
|
|
assert os.path.exists(
|
|
phi_model_root
|
|
), f"{phi_model_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
|
|
return phi_model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("falcon-180b", True)
|
|
def llm_falcon_180b_model_root():
|
|
"prepare falcon 180b model & return falcon model root"
|
|
raise RuntimeError("falcon 180b must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("falcon-11B", True)
|
|
def llm_falcon_11b_model_root(llm_venv):
|
|
"prepare falcon-11B model & return falcon model root"
|
|
workspace = llm_venv.get_working_directory()
|
|
model_root = os.path.join(workspace, "falcon-11B")
|
|
|
|
call(f"git clone https://huggingface.co/tiiuae/falcon-11B {model_root}",
|
|
shell=True)
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("email_composition", True)
|
|
def llm_gpt2_next_8b_model_root():
|
|
raise RuntimeError("gpt-next 8b must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_glm_4_9b_model_root(request):
|
|
"prepare glm-4-9b model & return model path"
|
|
model_name = request.param
|
|
models_root = llm_models_root()
|
|
if model_name == "glm-4-9b":
|
|
model_root = os.path.join(models_root, "glm-4-9b")
|
|
elif model_name == "glm-4-9b-chat":
|
|
model_root = os.path.join(models_root, "glm-4-9b-chat")
|
|
elif model_name == "glm-4-9b-chat-1m":
|
|
model_root = os.path.join(models_root, "glm-4-9b-chat-1m")
|
|
elif model_name == "glm-4v-9b":
|
|
model_root = os.path.join(models_root, "glm-4v-9b")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("internlm-chat-7b", True)
|
|
def llm_internlm_7b_model_root(llm_venv):
|
|
"prepare internlm 7b model"
|
|
workspace = llm_venv.get_working_directory()
|
|
model_root = os.path.join(workspace, "internlm-chat-7b")
|
|
|
|
call(
|
|
f"git clone https://huggingface.co/internlm/internlm-chat-7b {model_root}",
|
|
shell=True)
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("internlm2-7b", True)
|
|
def llm_internlm2_7b_model_root(llm_venv):
|
|
"prepare internlm2 7b model"
|
|
workspace = llm_venv.get_working_directory()
|
|
model_root = os.path.join(workspace, "internlm2-7b")
|
|
|
|
call(f"git clone https://huggingface.co/internlm/internlm2-7b {model_root}",
|
|
shell=True)
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("internlm-chat-20b", True)
|
|
def llm_internlm_20b_model_root(llm_venv):
|
|
"prepare internlm 20b model"
|
|
workspace = llm_venv.get_working_directory()
|
|
model_root = os.path.join(workspace, "internlm-chat-20b")
|
|
|
|
call(
|
|
f"git clone https://huggingface.co/internlm/internlm-chat-20b {model_root}",
|
|
shell=True)
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
@cached_in_llm_models_root("Qwen-7B-Chat", True)
|
|
def llm_qwen_7b_model_root(llm_venv):
|
|
"prepare qwen-7b model & return model path"
|
|
workspace = llm_venv.get_working_directory()
|
|
model_root = os.path.join(workspace, "Qwen-7B-Chat")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_qwen_model_root(request, llm_venv):
|
|
"prepare qwen model & return model path"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat")
|
|
|
|
if hasattr(request, "param"):
|
|
if request.param == "qwen_7b_chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat")
|
|
elif request.param == "qwen_14b_chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen-14B-Chat")
|
|
elif request.param == "qwen_72b_chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen-72B-Chat")
|
|
elif request.param == "qwen_7b_chat_int4":
|
|
qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat-Int4")
|
|
elif request.param == "qwen-vl-chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen-VL-Chat")
|
|
elif request.param == "qwen1.5_7b_chat_awq":
|
|
qwen_model_root = os.path.join(models_root, "Qwen1.5-7B-Chat-AWQ")
|
|
elif request.param == "qwen1.5_0.5b_chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen1.5-0.5B-Chat")
|
|
elif request.param == "qwen1.5_7b_chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen1.5-7B-Chat")
|
|
elif request.param == "qwen1.5_14b_chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen1.5-14B-Chat")
|
|
elif request.param == "qwen1.5_moe_a2.7b_chat":
|
|
qwen_model_root = os.path.join(models_root,
|
|
"Qwen1.5-MoE-A2.7B-Chat")
|
|
elif request.param == "qwen1.5_72b_chat":
|
|
qwen_model_root = os.path.join(models_root, "Qwen1.5-72B-Chat")
|
|
elif request.param == "qwen1.5_moe_a2.7b_chat":
|
|
qwen_model_root = os.path.join(models_root,
|
|
"Qwen1.5-MoE-A2.7B-Chat")
|
|
elif request.param == "qwen1.5_14b_chat_int4":
|
|
qwen_model_root = os.path.join(models_root,
|
|
"Qwen1.5-14B-Chat-GPTQ-Int4")
|
|
elif request.param == "qwen2_0.5b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2-0.5B-Instruct")
|
|
elif request.param == "qwen2_7b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2-7B-Instruct")
|
|
elif request.param == "qwen2_7b_awq":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2-7B-Instruct-AWQ")
|
|
elif request.param == "qwen2_57b_a14b":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2-57B-A14B")
|
|
elif request.param == "qwen2_72b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2-72B-Instruct")
|
|
elif request.param == "qwen2_vl_7b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2-VL-7B-Instruct")
|
|
elif request.param == "qwen2_audio_7b_instruct":
|
|
qwen_model_root = os.path.join(models_root,
|
|
"Qwen2-Audio-7B-Instruct")
|
|
elif request.param == "qwen2.5_0.5b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2.5-0.5B-Instruct")
|
|
elif request.param == "qwen2.5_1.5b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2.5-1.5B-Instruct")
|
|
elif request.param == "qwen2.5_7b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2.5-7B-Instruct")
|
|
elif request.param == "qwen2.5_14b_instruct_int4":
|
|
qwen_model_root = os.path.join(models_root,
|
|
"Qwen2.5-14B-Instruct-GPTQ-Int4")
|
|
elif request.param == "qwen2.5_72b_instruct":
|
|
qwen_model_root = os.path.join(models_root, "Qwen2.5-72B-Instruct")
|
|
|
|
assert exists(qwen_model_root), f"{qwen_model_root} does not exist!"
|
|
|
|
return qwen_model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def llm_granite_model_root(request):
|
|
models_root = llm_models_root()
|
|
model_name = request.param
|
|
granite_model_root = os.path.join(models_root, model_name)
|
|
assert exists(granite_model_root), f"{granite_model_root} does not exist!"
|
|
return granite_model_root
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
@cached_in_llm_models_root("nemotron/Nemotron-3-8B-Base-4k.nemo", True)
|
|
def llm_nemotron_3_8b_model_root():
|
|
"get nemotron/Nemotron-3-8B-Base-4k.nemo"
|
|
raise RuntimeError("nemotron/Nemotron-3-8B-Base-4k.nemo must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
@cached_in_llm_models_root("nemotron/Nemotron-4-15B-Base.nemo", True)
|
|
def llm_nemotron_4_15b_model_root():
|
|
"get nemotron/Nemotron-4-15B-Base.nemo"
|
|
raise RuntimeError("nemotron/Nemotron-4-15B-Base.nemo must be cached")
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def mmlu_dataset_root():
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
|
|
mmlu_dataset_root = os.path.join(models_root, "datasets", "mmlu")
|
|
|
|
assert os.path.exists(
|
|
mmlu_dataset_root
|
|
), f"{mmlu_dataset_root} does not exist under NFS LLM_MODELS_ROOT dir"
|
|
return mmlu_dataset_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def deepseek_model_root(request):
|
|
"get deepseek model"
|
|
models_root = llm_models_root()
|
|
assert models_root, "Did you set LLM_MODELS_ROOT?"
|
|
if request.param == "deepseek-coder-6.7b-instruct":
|
|
model_root = os.path.join(models_root, "deepseek-coder-6.7b-instruct")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llm_commandr_v01_model_root(llm_venv):
|
|
"prepare command-r model & return model path"
|
|
models_root = llm_models_root()
|
|
model_root = os.path.join(models_root, "c4ai-command-r-v01")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llm_commandr_plus_model_root(llm_venv):
|
|
"prepare command-r-plus model & return model path"
|
|
models_root = llm_models_root()
|
|
model_root = os.path.join(models_root, "c4ai-command-r-plus")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llm_aya_23_8b_model_root(llm_venv):
|
|
"prepare Aya-23-8B model & return model path"
|
|
models_root = llm_models_root()
|
|
model_root = os.path.join(models_root, "aya-23-8B")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llm_aya_23_35b_model_root(llm_venv):
|
|
"prepare Aya-23-35B model & return model path"
|
|
models_root = llm_models_root()
|
|
model_root = os.path.join(models_root, "aya-23-35B")
|
|
|
|
return model_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def engine_dir(llm_venv, capfd):
|
|
"Get engine dir"
|
|
engine_path = os.path.join(llm_venv.get_working_directory(), "engines")
|
|
print_storage_usage(llm_venv.get_working_directory(),
|
|
"before removing existing engines", capfd)
|
|
# clean the engine dir for each case.
|
|
cur_time = time.time()
|
|
expire = time.time() + 60
|
|
while exists(engine_path) and cur_time < expire:
|
|
shutil.rmtree(engine_path, ignore_errors=True)
|
|
time.sleep(2)
|
|
cur_time = time.time()
|
|
|
|
print_storage_usage(llm_venv.get_working_directory(),
|
|
"after removing existing engines", capfd)
|
|
return engine_path
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def cmodel_dir(llm_venv):
|
|
"converted model dir"
|
|
model_dir = os.path.join(llm_venv.get_working_directory(), "cmodels")
|
|
|
|
yield model_dir
|
|
|
|
if exists(model_dir):
|
|
shutil.rmtree(model_dir)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def qcache_dir(llm_venv, llm_root):
|
|
"get quantization cache dir"
|
|
defs.ci_profiler.start("qcache_dir")
|
|
|
|
cache_dir = os.path.join(llm_venv.get_working_directory(), "qcache")
|
|
|
|
quantization_root = os.path.join(llm_root, "examples", "quantization")
|
|
|
|
import platform
|
|
|
|
# Fix the issue that the requirements.txt is not available on aarch64.
|
|
if "aarch64" not in platform.machine() and get_sm_version() >= 89:
|
|
llm_venv.run_cmd([
|
|
"-m", "pip", "install", "-r",
|
|
os.path.join(quantization_root, "requirements.txt")
|
|
])
|
|
|
|
if not exists(cache_dir):
|
|
makedirs(cache_dir)
|
|
|
|
yield cache_dir
|
|
|
|
if exists(cache_dir):
|
|
shutil.rmtree(cache_dir)
|
|
|
|
defs.ci_profiler.stop("qcache_dir")
|
|
print(
|
|
f"qcache_dir: {defs.ci_profiler.elapsed_time_in_sec('qcache_dir')} sec")
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def qcache_dir_without_install_package(llm_venv, llm_root):
|
|
"get quantization cache dir"
|
|
defs.ci_profiler.start("qcache_dir_without_install_package")
|
|
|
|
cache_dir = os.path.join(llm_venv.get_working_directory(), "qcache")
|
|
|
|
if not exists(cache_dir):
|
|
makedirs(cache_dir)
|
|
|
|
yield cache_dir
|
|
|
|
if exists(cache_dir):
|
|
shutil.rmtree(cache_dir)
|
|
|
|
defs.ci_profiler.stop("qcache_dir_without_install_package")
|
|
print(
|
|
f"qcache_dir_without_install_package: {defs.ci_profiler.elapsed_time_in_sec('qcache_dir_without_install_package')} sec"
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def star_attention_input_root(llm_root):
|
|
"Get star attention input file dir"
|
|
star_attention_input_root = unittest_path() / "_torch" / "multi_gpu"
|
|
|
|
return star_attention_input_root
|
|
|
|
|
|
def parametrize_with_ids(argnames: str | Sequence[str],
|
|
argvalues: Iterable[ParameterSet | Sequence[object]
|
|
| object], **kwargs):
|
|
"""An alternative to pytest.mark.parametrize with automatically generated test ids.
|
|
"""
|
|
if isinstance(argnames, str):
|
|
argname_list = [n.strip() for n in argnames.split(",")]
|
|
else:
|
|
argname_list = argnames
|
|
|
|
case_ids = []
|
|
for case_argvalues in argvalues:
|
|
if isinstance(case_argvalues, ParameterSet):
|
|
case_argvalues = case_argvalues.values
|
|
elif case_argvalues is None or isinstance(case_argvalues,
|
|
(str, float, int, bool)):
|
|
case_argvalues = (case_argvalues, )
|
|
assert len(case_argvalues) == len(argname_list)
|
|
|
|
case_id = [
|
|
f"{name}={value}"
|
|
for name, value in zip(argname_list, case_argvalues)
|
|
]
|
|
case_ids.append("-".join(case_id))
|
|
|
|
return pytest.mark.parametrize(argnames, argvalues, ids=case_ids, **kwargs)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def skip_by_device_count(request):
|
|
"fixture for skip less device count"
|
|
if request.node.get_closest_marker('skip_less_device'):
|
|
device_count = get_device_count()
|
|
expected_count = request.node.get_closest_marker(
|
|
'skip_less_device').args[0]
|
|
if expected_count > int(device_count):
|
|
pytest.skip(
|
|
f'Device count {device_count} is less than {expected_count}')
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def skip_by_device_memory(request):
|
|
"fixture for skip less device memory"
|
|
if request.node.get_closest_marker('skip_less_device_memory'):
|
|
device_memory = get_device_memory()
|
|
expected_memory = request.node.get_closest_marker(
|
|
'skip_less_device_memory').args[0]
|
|
if expected_memory > int(device_memory):
|
|
pytest.skip(
|
|
f'Device memory {device_memory} is less than {expected_memory}')
|
|
|
|
|
|
def get_sm_version():
|
|
"get compute capability"
|
|
prop = torch.cuda.get_device_properties(0)
|
|
return prop.major * 10 + prop.minor
|
|
|
|
|
|
skip_pre_ada = pytest.mark.skipif(
|
|
get_sm_version() < 89,
|
|
reason="This test is not supported in pre-Ada architecture")
|
|
|
|
skip_pre_hopper = pytest.mark.skipif(
|
|
get_sm_version() < 90,
|
|
reason="This test is not supported in pre-Hopper architecture")
|
|
|
|
skip_pre_blackwell = pytest.mark.skipif(
|
|
get_sm_version() < 100,
|
|
reason="This test is not supported in pre-Blackwell architecture")
|
|
|
|
skip_post_blackwell = pytest.mark.skipif(
|
|
get_sm_version() >= 100,
|
|
reason="This test is not supported in post-Blackwell architecture")
|
|
|
|
skip_no_nvls = pytest.mark.skipif(not ipc_nvls_supported(),
|
|
reason="NVLS is not supported")
|
|
skip_no_hopper = pytest.mark.skipif(
|
|
get_sm_version() != 90,
|
|
reason="This test is only supported in Hopper architecture")
|
|
|
|
skip_no_sm120 = pytest.mark.skipif(get_sm_version() != 120,
|
|
reason="This test is for Blackwell SM120")
|
|
|
|
|
|
def skip_fp8_pre_ada(use_fp8):
|
|
"skip fp8 tests if sm version less than 8.9"
|
|
if use_fp8 and get_sm_version() < 89:
|
|
pytest.skip("FP8 is not supported on pre-Ada architectures")
|
|
|
|
|
|
def skip_fp4_pre_blackwell(use_fp4):
|
|
"skip fp4 tests if sm version less than 10.0 or greater or equal to 12.0"
|
|
if use_fp4 and (get_sm_version() < 100 or get_sm_version() >= 120):
|
|
pytest.skip("FP4 is not supported on pre-Blackwell architectures")
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def skip_device_not_contain(request):
|
|
"skip test if device not contain keyword"
|
|
if request.node.get_closest_marker('skip_device_not_contain'):
|
|
keyword_list = request.node.get_closest_marker(
|
|
'skip_device_not_contain').args[0]
|
|
device = get_gpu_device_list()[0]
|
|
if not any(keyword in device for keyword in keyword_list):
|
|
pytest.skip(
|
|
f"Device {device} does not contain keyword in {keyword_list}.")
|
|
|
|
|
|
def get_gpu_device_list():
|
|
"get device list"
|
|
with tempfile.TemporaryDirectory() as temp_dirname:
|
|
suffix = ".exe" if is_windows() else ""
|
|
# TODO: Use NRSU because we can't assume nvidia-smi across all platforms.
|
|
cmd = " ".join(["nvidia-smi" + suffix, "-L"])
|
|
output = check_output(cmd, shell=True, cwd=temp_dirname)
|
|
return [l.strip() for l in output.strip().split("\n")]
|
|
|
|
|
|
def get_device_count():
|
|
"return device count"
|
|
return len(get_gpu_device_list())
|
|
|
|
|
|
def get_device_memory():
|
|
"get gpu memory"
|
|
memory = 0
|
|
with tempfile.TemporaryDirectory() as temp_dirname:
|
|
suffix = ".exe" if is_windows() else ""
|
|
# TODO: Use NRSU because we can't assume nvidia-smi across all platforms.
|
|
cmd = " ".join([
|
|
"nvidia-smi" + suffix, "--query-gpu=memory.total",
|
|
"--format=csv,noheader"
|
|
])
|
|
output = check_output(cmd, shell=True, cwd=temp_dirname)
|
|
memory = int(output.strip().split()[0])
|
|
|
|
return memory
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
parser.addoption(
|
|
"--test-list",
|
|
"-F",
|
|
action="store",
|
|
default=None,
|
|
help="Path to the file containing the list of tests to run")
|
|
parser.addoption(
|
|
"--workspace",
|
|
"--ws",
|
|
action="store",
|
|
default=None,
|
|
help="Workspace path to store temp data generated during the tests")
|
|
parser.addoption(
|
|
"--waives-file",
|
|
"-S",
|
|
action="store",
|
|
default=None,
|
|
help=
|
|
"Specify a file containing a list of waives, one per line. After filtering collected tests, Pytest will "
|
|
"apply the waive state specified by this file to the set of tests to be run."
|
|
)
|
|
parser.addoption(
|
|
"--output-dir",
|
|
"-O",
|
|
action="store",
|
|
default=None,
|
|
help=
|
|
"Directory to store test output. Should point to a new or existing empty directory."
|
|
)
|
|
parser.addoption(
|
|
"--test-prefix",
|
|
"-P",
|
|
action="store",
|
|
default=None,
|
|
help=
|
|
"It is useful when using such prefix to mapping waive lists for specific GPU, such as 'GH200'"
|
|
)
|
|
parser.addoption("--regexp",
|
|
"-R",
|
|
action='store',
|
|
default=None,
|
|
help="A regexp to specify which tests to run")
|
|
parser.addoption(
|
|
"--apply-test-list-correction",
|
|
"-C",
|
|
action='store_true',
|
|
help=
|
|
"Attempt to automatically correct invalid test names in filter files and print the correct name in terminal. "
|
|
"If the correct name cannot be determined, the invalid test name will be printed to the terminal as well."
|
|
)
|
|
parser.addoption("--perf",
|
|
action="store_true",
|
|
help="'--perf' will run perf tests")
|
|
parser.addoption(
|
|
"--perf-log-formats",
|
|
help=
|
|
"Supply either 'yaml' or 'csv' as values. Supply multiple same flags for multiple formats.",
|
|
action="append",
|
|
default=[])
|
|
|
|
|
|
@pytest.hookimpl(trylast=True)
|
|
def pytest_generate_tests(metafunc: pytest.Metafunc):
|
|
if metafunc.definition.function.__name__ != 'test_unittests_v2':
|
|
return
|
|
testlist_path = metafunc.config.getoption("--test-list")
|
|
if not testlist_path:
|
|
return
|
|
|
|
with open(testlist_path, "r") as f:
|
|
lines = f.readlines()
|
|
lines = preprocess_test_list_lines(testlist_path, lines)
|
|
|
|
uts = []
|
|
ids = []
|
|
for line in lines:
|
|
if line.startswith("unittest/"):
|
|
if " TIMEOUT " in line:
|
|
# Process for marker TIMEOUT
|
|
case_part, timeout_part = line.split(" TIMEOUT ", 1)
|
|
case = case_part.strip()
|
|
timeout_str = timeout_part.strip()
|
|
timeout_num_match = re.search(r'\(?(\d+)\)?', timeout_str)
|
|
if timeout_num_match:
|
|
timeout_min = int(timeout_num_match.group(1))
|
|
timeout_sec = timeout_min * 60
|
|
else:
|
|
raise ValueError(
|
|
f"Invalid TIMEOUT format: {timeout_str} in line: {line}"
|
|
)
|
|
mark = pytest.mark.timeout(int(timeout_sec))
|
|
uts.append(pytest.param(case, marks=mark))
|
|
# Change back id to include timeout information
|
|
ids.append(f"{case} TIMEOUT {timeout_str}")
|
|
else:
|
|
uts.append(line.strip())
|
|
metafunc.parametrize("case", uts, ids=lambda x: x)
|
|
|
|
|
|
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
|
def pytest_collection_modifyitems(session, config, items):
|
|
testlist_path = config.getoption("--test-list")
|
|
waives_file = config.getoption("--waives-file")
|
|
test_prefix = config.getoption("--test-prefix")
|
|
perf_test = config.getoption("--perf")
|
|
|
|
if perf_test:
|
|
global ALL_PYTEST_ITEMS
|
|
ALL_PYTEST_ITEMS = None
|
|
|
|
import copy
|
|
|
|
# Do not import at global level since that would create cyclic imports.
|
|
from .perf.test_perf import generate_perf_tests
|
|
|
|
# Perf tests are generated based on the test list to speed up the test collection time.
|
|
items = generate_perf_tests(session, config, items)
|
|
|
|
ALL_PYTEST_ITEMS = copy.copy(items)
|
|
|
|
if test_prefix:
|
|
# Override the internal nodeid of each item to contain the correct test prefix.
|
|
# This is needed for reporting to correctly process the test name in order to bucket
|
|
# it into the appropriate test suite.
|
|
for item in items:
|
|
item._nodeid = "{}/{}".format(test_prefix, item._nodeid)
|
|
|
|
regexp = config.getoption("--regexp")
|
|
|
|
if testlist_path:
|
|
modify_by_test_list(testlist_path, items, config)
|
|
|
|
if regexp is not None:
|
|
deselect_by_regex(regexp, items, test_prefix, config)
|
|
|
|
if waives_file:
|
|
apply_waives(waives_file, items, config)
|
|
|
|
# We have to remove prefix temporarily before splitting the test list
|
|
# After that change back the test id.
|
|
for item in items:
|
|
if test_prefix and item._nodeid.startswith(f"{test_prefix}/"):
|
|
item._nodeid = item._nodeid[len(f"{test_prefix}/"):]
|
|
yield
|
|
for item in items:
|
|
if test_prefix:
|
|
item._nodeid = f"{test_prefix}/{item._nodeid}"
|
|
|
|
|
|
def pytest_configure(config):
|
|
# avoid thread leak of tqdm's TMonitor
|
|
tqdm.tqdm.monitor_interval = 0
|
|
|
|
|
|
def deselect_by_regex(regexp, items, test_prefix, config):
|
|
"""Filter out tests based on the patterns specified in the given list of regular expressions.
|
|
If a test matches *any* of the expressions in the list it is considered selected."""
|
|
compiled_regexes = []
|
|
regex_list = []
|
|
r = re.compile(regexp)
|
|
compiled_regexes.append(r)
|
|
regex_list.append(regexp)
|
|
|
|
selected = []
|
|
deselected = []
|
|
|
|
corrections = get_test_name_corrections_v2(set(regex_list),
|
|
set(it.nodeid for it in items),
|
|
TestCorrectionMode.REGEX)
|
|
handle_corrections(corrections, test_prefix)
|
|
|
|
for item in items:
|
|
found = False
|
|
for regex in compiled_regexes:
|
|
if regex.search(item.nodeid):
|
|
found = True
|
|
break
|
|
if found:
|
|
selected.append(item)
|
|
else:
|
|
deselected.append(item)
|
|
|
|
if deselected:
|
|
config.hook.pytest_deselected(items=deselected)
|
|
items[:] = selected
|
|
|
|
|
|
@pytest.hookimpl(hookwrapper=True)
|
|
def pytest_runtest_makereport(item, call):
|
|
outcome = yield
|
|
report = outcome.get_result()
|
|
|
|
if call.when == "call":
|
|
report.file = str(item.fspath)
|
|
report.line = str(item.location[1])
|
|
report.url = ""
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def all_pytest_items():
|
|
"""
|
|
Provides all pytest items available in the current test definitions, before any
|
|
filtering has been applied.
|
|
"""
|
|
return ALL_PYTEST_ITEMS
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def test_root():
|
|
return os.path.dirname(os.path.dirname(__file__))
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def test_case(request, llm_root):
|
|
"get test case"
|
|
test_cases_file = "tests/integration/defs/test_cases.yml"
|
|
input_file_dir = "tests/integration/test_input_files"
|
|
test_cases_file_path = os.path.join(llm_root, test_cases_file)
|
|
case_name = request.param
|
|
|
|
with open(test_cases_file_path, 'r', encoding='UTF-8') as file:
|
|
test_cases = yaml.safe_load(file)
|
|
|
|
case = test_cases["test_cases"][case_name]
|
|
input_file = case["input_file"]
|
|
|
|
case["input_file"] = os.path.join(llm_root, input_file_dir, input_file)
|
|
|
|
return case
|
|
|
|
|
|
def check_nvlink():
|
|
"check nvlink status"
|
|
with tempfile.TemporaryDirectory() as temp_dirname:
|
|
try:
|
|
suffix = ".exe" if is_windows() else ""
|
|
# TODO: Use NRSU because we can't assume nvidia-smi across all platforms.
|
|
cmd = " ".join(["nvidia-smi" + suffix, "nvlink", "-s", "-i", "0"])
|
|
output = check_output(cmd, shell=True, cwd=temp_dirname)
|
|
except sp.CalledProcessError:
|
|
return False
|
|
|
|
if len(output.strip()) == 0:
|
|
return False
|
|
|
|
return "inActive" not in output.strip()
|
|
|
|
|
|
skip_nvlink_inactive = pytest.mark.skipif(check_nvlink() is False,
|
|
reason="nvlink is inactive.")
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def eval_venv(llm_venv):
|
|
"set UCC_TEAM_IDS_POOL_SIZE=1024"
|
|
|
|
llm_venv._new_env["UCC_TEAM_IDS_POOL_SIZE"] = "1024"
|
|
|
|
yield llm_venv
|
|
|
|
llm_venv._new_env.pop("UCC_TEAM_IDS_POOL_SIZE")
|
|
|
|
|
|
def get_host_total_memory():
|
|
"get host memory Mib"
|
|
memory = psutil.virtual_memory().total
|
|
|
|
return int(memory / 1024 / 1024)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def skip_by_host_memory(request):
|
|
"fixture for skip less host memory"
|
|
if request.node.get_closest_marker('skip_less_host_memory'):
|
|
host_memory = get_host_total_memory()
|
|
expected_memory = request.node.get_closest_marker(
|
|
'skip_less_host_memory').args[0]
|
|
if expected_memory > int(host_memory):
|
|
pytest.skip(
|
|
f'Host memory {host_memory} is less than {expected_memory}')
|
|
|
|
|
|
IS_UNDER_CI_ENV = 'JENKINS_HOME' in os.environ
|
|
|
|
gpu_warning_threshold = 1024 * 1024 * 1024
|
|
|
|
|
|
def collect_status(item: pytest.Item):
|
|
if not IS_UNDER_CI_ENV:
|
|
return
|
|
|
|
import psutil
|
|
import pynvml
|
|
pynvml.nvmlInit()
|
|
|
|
handles = {
|
|
idx: pynvml.nvmlDeviceGetHandleByIndex(idx)
|
|
for idx in range(pynvml.nvmlDeviceGetCount())
|
|
}
|
|
|
|
deadline = time.perf_counter() + 60 # 1 min
|
|
observed_used = 0
|
|
global gpu_warning_threshold
|
|
|
|
while time.perf_counter() < deadline:
|
|
observed_used = max(
|
|
pynvml.nvmlDeviceGetMemoryInfo(device).used
|
|
for device in handles.values())
|
|
if observed_used <= gpu_warning_threshold:
|
|
break
|
|
time.sleep(1)
|
|
else:
|
|
gpu_warning_threshold = max(observed_used, gpu_warning_threshold)
|
|
warnings.warn(
|
|
f"Test {item.name} does not free up GPU memory correctly!")
|
|
|
|
gpu_memory = {}
|
|
for idx, device in handles.items():
|
|
total_used = pynvml.nvmlDeviceGetMemoryInfo(device).used // 1024 // 1024
|
|
total = pynvml.nvmlDeviceGetMemoryInfo(device).total // 1024 // 1024
|
|
detail = pynvml.nvmlDeviceGetComputeRunningProcesses(device)
|
|
process = {}
|
|
|
|
for entry in detail:
|
|
try:
|
|
p = psutil.Process(entry.pid)
|
|
host_memory_in_mbs = p.memory_full_info().uss // 1024 // 1024
|
|
process[entry.pid] = (entry.usedGpuMemory // 1024 // 1024,
|
|
host_memory_in_mbs, p.cmdline())
|
|
except Exception:
|
|
pass
|
|
|
|
gpu_memory[idx] = {
|
|
"total_used": total_used,
|
|
'total': total,
|
|
"process": process
|
|
}
|
|
print('\nCurrent memory status:')
|
|
print(gpu_memory)
|
|
|
|
|
|
@pytest.hookimpl(wrapper=True)
|
|
def pytest_runtest_protocol(item, nextitem):
|
|
ret = yield
|
|
collect_status(item)
|
|
return ret
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def deterministic_test_root(llm_root, llm_venv):
|
|
"Get deterministic test root"
|
|
deterministic_root = os.path.join(llm_root,
|
|
"tests/integration/defs/deterministic")
|
|
|
|
return deterministic_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def disaggregated_test_root(llm_root, llm_venv):
|
|
"Get disaggregated test root"
|
|
disaggregated_root = os.path.join(llm_root,
|
|
"tests/integration/defs/disaggregated")
|
|
|
|
return disaggregated_root
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def tritonserver_test_root(llm_root):
|
|
"Get tritonserver test root"
|
|
tritonserver_root = os.path.join(llm_root,
|
|
"tests/integration/defs/triton_server")
|
|
|
|
return tritonserver_root
|