# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # -*- coding: utf-8 -*- import datetime import json import os import re import shutil import subprocess as sp import tempfile import time import urllib.request from functools import wraps from pathlib import Path import defs.ci_profiler import psutil import pytest import yaml from tensorrt_llm.bindings import ipc_nvls_supported from .perf.gpu_clock_lock import GPUClockLock from .perf.session_data_writer import SessionDataWriter from .test_list_parser import (TestCorrectionMode, apply_waives, get_test_name_corrections_v2, handle_corrections, modify_by_test_list, preprocess_test_list_lines) from .trt_test_alternative import (call, check_output, exists, is_windows, is_wsl, makedirs, print_info, print_warning, wsl_to_win_path) try: from llm import trt_environment except ImportError: trt_environment = None # TODO: turn off this when the nightly storage issue is resolved. DEBUG_CI_STORAGE = os.environ.get("DEBUG_CI_STORAGE", False) GITLAB_API_USER = os.environ.get("GITLAB_API_USER") GITLAB_API_TOKEN = os.environ.get("GITLAB_API_TOKEN") EVALTOOL_REPO_URL = os.environ.get("EVALTOOL_REPO_URL") LLM_GATE_WAY_CLIENT_ID = os.environ.get("LLM_GATE_WAY_CLIENT_ID") LLM_GATE_WAY_TOKEN = os.environ.get("LLM_GATE_WAY_TOKEN") def print_storage_usage(path, tag, capfd): if DEBUG_CI_STORAGE: stat = shutil.disk_usage(path) with capfd.disabled(): print_info( f"\nUsage of {path} {stat} @{tag}, used in GB: {stat.used/(2**30)}" ) def wget(url, out): filename = os.path.basename(url) os.makedirs(out, exist_ok=True) urllib.request.urlretrieve(url, os.path.join(out, filename)) def llm_models_root() -> str: '''return LLM_MODELS_ROOT path if it is set in env, assert when it's set but not a valid path ''' DEFAULT_LLM_MODEL_ROOT = os.path.join("/scratch.trt_llm_data", "llm-models") LLM_MODELS_ROOT = os.environ.get("LLM_MODELS_ROOT", DEFAULT_LLM_MODEL_ROOT) return LLM_MODELS_ROOT def tests_path() -> Path: return (Path(os.path.dirname(__file__)) / "../..").resolve() def unittest_path() -> Path: return tests_path() / "unittest" def integration_path() -> Path: return tests_path() / "integration" def cached_in_llm_models_root(path_relative_to_llm_models_root, fail_if_path_is_invalid=False): ''' Use this decorator to declare a cached path in the LLM_MODELS_ROOT directory. That decorator is intended to be used with pytest.fixture functions which prepare and return a data path for some tests. The cache is only queried when llm_models_root() does not return None, and the cache is skipped otherwise. When the cache is queried, and the specified path does not exist, the function: - Triggers an AssertionFailure when fail_if_path_is_invalid is True, - Ignore the invalid path and fallbacks to calling the fixture otherwise. The purpose of the `fail_if_path_is_invalid` is the following: - If you submit a test and the data is not in the cached NFS LLM_MODELS_ROOT dir yet, you can use `fail_if_path_is_invalid=False` (the default). In that case, the fixture will use the fallback path and ignore the cache miss in the CI. After submitting the data to the cached NFS LLM_MODELS_ROOT dir, your test will automatically pickup the cached data. - If your data is known to always be in the LLM_MODELS_ROOT, and you want to make sure that the test fails loudly when it misses in cache, you should specify fail_if_path_is_invalid=True to force the failure. It is useful for when a cache miss will cause a big performance drop for the CI jobs. Example: If you have a fixture which downloads the SantaCoder repo and returns its path for one SantaCoder test, you can do the following: @pytest.fixture(scope="session") def llm_gpt2_santacoder_model_root(llm_venv): workspace = llm_venv.get_working_directory() gpt2_santacoder_model_root = os.path.join(workspace, "santacoder") call( f"git clone https://huggingface.co/bigcode/santacoder {gpt2_santacoder_model_root}", shell=True) return gpt2_santacoder_model_root At some point, if you decide to cache the SantaCoder in the LLM_MODELS_ROOT, you can decorate the fixture to enforce the test to use the ${LLM_MODELS_ROOT}/santacoder cached directory. You can upload SantaCoder to that location before or after submitting this code since there is a fallback path to clone the repo if it is not found in cache. @pytest.fixture(scope="session") @cached_in_llm_models_root("santacoder") def llm_gpt2_santacoder_model_root(llm_venv): ... keep the original code ''' def wrapper(f): @wraps(f) def decorated(*args, **kwargs): if llm_models_root() is not None: cached_dir = f"{llm_models_root()}/{path_relative_to_llm_models_root}" if os.path.exists(cached_dir): return cached_dir elif fail_if_path_is_invalid: assert False, f"{cached_dir} does not exist, and fail_if_path_is_invalid is True, please check the cache directory" return f(*args, **kwargs) return decorated return wrapper # Fixture about whether the current pipeline is running in TRT environment. @pytest.fixture(scope="session") def is_trt_environment(): return trt_environment is not None # Helper function to get llm_root. Do not define it as a fixture so that this # function can be used during test collection phase. def get_llm_root(trt_config=None, gitlab_token=None): if trt_environment: return trt_environment.setup_tensorrt_llm_repo(trt_config, gitlab_token) llm_repo_root = os.environ.get("LLM_ROOT", None) if llm_repo_root is None: llm_repo_root = os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) print_warning( f"The LLM_ROOT env var is not defined! Using {llm_repo_root} as LLM_ROOT." ) return llm_repo_root @pytest.fixture(scope="session") def llm_root(): return get_llm_root() @pytest.fixture(scope="session") def llm_datasets_root() -> str: return os.path.join(llm_models_root(), "datasets") @pytest.fixture(scope="session") def llm_rouge_root() -> str: return os.path.join(llm_models_root(), "rouge") @pytest.fixture(scope="module") def bert_example_root(llm_root): "Get bert example root" example_root = os.path.join(llm_root, "examples", "bert") return example_root @pytest.fixture(scope="module") def enc_dec_example_root(llm_root): "Get encoder-decoder example root" example_root = os.path.join(llm_root, "examples", "enc_dec") return example_root @pytest.fixture(scope="module") def whisper_example_root(llm_root, llm_venv): "Get whisper example root" example_root = os.path.join(llm_root, "examples", "whisper") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def opt_example_root(llm_root, llm_venv): "Get opt example root" example_root = os.path.join(llm_root, "examples", "models", "contrib", "opt") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def llama_example_root(llm_root, llm_venv): "Get llama example root" example_root = os.path.join(llm_root, "examples", "llama") try: llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) except: print("pip install error!") return example_root @pytest.fixture(scope="module") def llmapi_example_root(llm_root, llm_venv): "Get llm api example root" example_root = os.path.join(llm_root, "examples", "llm-api") return example_root @pytest.fixture(scope="module") def disaggregated_example_root(llm_root, llm_venv): "Get disaggregated example root" example_root = os.path.join(llm_root, "examples", "disaggregated") return example_root @pytest.fixture(scope="module") def gemma_example_root(llm_root, llm_venv): "Get gemma example root" example_root = os.path.join(llm_root, "examples", "gemma") # https://nvbugs/4559583 Jax dependency broke the entire pipeline in TRT container # due to the dependency incompatibility with torch, which forced reinstall everything # and caused pipeline to fail. We manually install gemma dependency as a WAR. llm_venv.run_cmd(["-m", "pip", "install", "safetensors~=0.4.1", "nltk"]) # Install Jax because it breaks dependency import platform google_extension = [ "-f", "https://storage.googleapis.com/jax-releases/jax_cuda_releases.html" ] # WAR the new posting of "nvidia-cudnn-cu12~=9.0". # "jax[cuda12_pip]~=0.4.19" specifies "nvidia-cudnn-cu12>=8.9" but actually requires "nvidia-cudnn-cu12~=8.9". if "x86_64" in platform.machine(): llm_venv.run_cmd(["-m", "pip", "install", "nvidia-cudnn-cu12~=8.9"]) if "Windows" in platform.system(): llm_venv.run_cmd([ "-m", "pip", "install", "jax~=0.4.19", "jaxlib~=0.4.19", "--no-deps" ] + google_extension) else: llm_venv.run_cmd([ "-m", "pip", "install", "jax[cuda12_pip]~=0.4.19", "jaxlib[cuda12_pip]~=0.4.19", "--no-deps" ] + google_extension) llm_venv.run_cmd(["-m", "pip", "install", "flax~=0.8.0"]) return example_root @pytest.fixture(scope="function") def gemma_model_root(request): "Get gemma model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if hasattr(request, "param"): gemma_model_root = os.path.join(models_root, f"gemma/{request.param}") assert exists(gemma_model_root), f"{gemma_model_root} does not exist!" return gemma_model_root @pytest.fixture(scope="function") def minitron_model_root(request): "Get minitron model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if hasattr(request, "param"): assert request.param == "4b" minitron_model_root = os.path.join(models_root, "nemotron/Minitron-4B-Base") assert exists(minitron_model_root), f"{minitron_model_root} does not exist!" return minitron_model_root @pytest.fixture(scope="function") def mistral_nemo_model_root(request): "Get Mistral Nemo model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if hasattr(request, "param"): assert request.param == "Mistral-Nemo-12b-Base" mistral_nemo_model_root = os.path.join(models_root, "Mistral-Nemo-Base-2407") assert exists( mistral_nemo_model_root), f"{mistral_nemo_model_root} does not exist!" return mistral_nemo_model_root @pytest.fixture(scope="function") def mistral_nemo_minitron_model_root(request): "Get Mistral Nemo Minitron model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if hasattr(request, "param"): assert request.param == "Mistral-NeMo-Minitron-8B-Instruct" mistral_nemo_minitron_model_root = os.path.join( models_root, "Mistral-NeMo-Minitron-8B-Instruct") assert exists(mistral_nemo_minitron_model_root ), f"{mistral_nemo_minitron_model_root} does not exist!" return mistral_nemo_minitron_model_root @pytest.fixture(scope="module") def gpt_example_root(llm_root, llm_venv): "Get gpt example root" example_root = os.path.join(llm_root, "examples", "gpt") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def gptj_example_root(llm_root, llm_venv): "Get gptj example root" example_root = os.path.join(llm_root, "examples", "models", "contrib", "gptj") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def glm_4_9b_example_root(llm_root, llm_venv): "Get glm-4-9b example root" example_root = os.path.join(llm_root, "examples", "glm-4-9b") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def exaone_example_root(llm_root, llm_venv): "Get EXAONE example root" example_root = os.path.join(llm_root, "examples", "exaone") return example_root @pytest.fixture(scope="function") def llm_exaone_model_root(request) -> str: "Get EXAONE model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" exaone_model_root = os.path.join(models_root, "exaone") if hasattr(request, "param"): if request.param == "exaone_3.0_7.8b_instruct": exaone_model_root = os.path.join(models_root, "exaone") elif request.param == "exaone_deep_2.4b": exaone_model_root = os.path.join(models_root, "EXAONE-Deep-2.4B") return exaone_model_root @pytest.fixture(scope="module") def falcon_example_root(llm_root, llm_venv): "Get falcon example root" example_root = os.path.join(llm_root, "examples", "models", "contrib", "falcon") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="session") def plugin_gen_path(llm_root): "Path to the plugin_gen.py script" return os.path.join(llm_root, "tensorrt_llm", "tools", "plugin_gen", "plugin_gen.py") @pytest.fixture(scope="module") def internlm2_example_root(llm_root, llm_venv): "Get internlm2 example root" example_root = os.path.join(llm_root, "examples", "internlm2") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def qwen_example_root(llm_root, llm_venv): "Get qwen example root" example_root = os.path.join(llm_root, "examples", "qwen") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def draft_target_model_example_root(llm_root, llm_venv): "Get Draft-Target-Model example root" example_root = os.path.join(llm_root, "examples", "draft_target_model") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def prompt_lookup_example_root(llm_root, llm_venv): "Get Prompt-Lookup example root" example_root = os.path.join(llm_root, "examples", "prompt_lookup") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def medusa_example_root(llm_root, llm_venv): "Get medusa example root" example_root = os.path.join(llm_root, "examples", "medusa") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def redrafter_example_root(llm_root, llm_venv): "Get ReDrafter example root" example_root = os.path.join(llm_root, "examples", "redrafter") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def eagle_example_root(llm_root, llm_venv): "Get EAGLE example root" example_root = os.path.join(llm_root, "examples", "eagle") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def mamba_example_root(llm_root, llm_venv): "Get mamba example root" example_root = os.path.join(llm_root, "examples", "mamba") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) yield example_root llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(llm_root, "requirements.txt") ]) @pytest.fixture(scope="module") def recurrentgemma_example_root(llm_root, llm_venv): "Get recurrentgemma example root" example_root = os.path.join(llm_root, "examples", "recurrentgemma") # install requirements llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) yield example_root llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(llm_root, "requirements.txt") ]) @pytest.fixture(scope="module") def nemotron_nas_example_root(llm_root, llm_venv): example_root = os.path.join(llm_root, "examples", "nemotron_nas") yield example_root @pytest.fixture(scope="module") def nemotron_example_root(llm_root, llm_venv): "Get nemotron example root" example_root = os.path.join(llm_root, "examples", "nemotron") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def commandr_example_root(llm_root, llm_venv): "Get commandr example root" example_root = os.path.join(llm_root, "examples", "commandr") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="module") def deepseek_v2_example_root(llm_root, llm_venv): "Get deepseek v2 example root" example_root = os.path.join(llm_root, "examples", "models", "contrib", "deepseek_v2") llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(example_root, "requirements.txt") ]) return example_root @pytest.fixture(scope="function") def deepseek_v3_model_root(request): models_root = llm_models_root() if (request.param == "DeepSeek-V3"): deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3") elif (request.param == "DeepSeek-V3-Lite-bf16"): deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite", "bf16") elif (request.param == "DeepSeek-V3-Lite-fp8"): deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite", "fp8") elif (request.param == "DeepSeek-V3-Lite-nvfp4_moe_only"): deepseek_v3_model_root = os.path.join(models_root, "DeepSeek-V3-Lite", "nvfp4_moe_only") assert exists( deepseek_v3_model_root), f"{deepseek_v3_model_root} does not exist!" return deepseek_v3_model_root @pytest.fixture(scope="session") def trt_performance_cache_name(): return "performance.cache" @pytest.fixture(scope="session") def trt_performance_cache_fpath(llm_venv, trt_performance_cache_name): workspace = llm_venv.get_working_directory() fpath = os.path.join(workspace, trt_performance_cache_name) if is_wsl(): return wsl_to_win_path(fpath) return fpath # Get the executing perf case name @pytest.fixture(autouse=True) def perf_case_name(request): return request.node.nodeid @pytest.fixture(scope="session") def output_dir(request): output = request.config.getoption("--output-dir") if output: os.makedirs(str(output), exist_ok=True) return output @pytest.fixture(scope="session") def trt_gpu_clock_lock(request): """ Fixture for the GPUClockLock, used to interface with pynvml to get system properties and to lock/monitor GPU clocks. """ gpu_list = get_gpu_device_list() gpu_ids = [gpu.split()[1][:-1] for gpu in gpu_list] # Extract GPU IDs gpu_ids_str = ",".join(gpu_ids) gpu_clock_lock = GPUClockLock( gpu_id=gpu_ids_str, interval_ms=1000.0, ) yield gpu_clock_lock gpu_clock_lock.teardown() @pytest.fixture(scope="session") def llm_session_data_writer(request, trt_gpu_clock_lock, output_dir): """ Fixture for the SessionDataWriter, used to write session data to output directory. """ session_data_writer = SessionDataWriter( log_output_directory=output_dir, output_formats=request.config.getoption("--perf-log-formats"), gpu_clock_lock=trt_gpu_clock_lock, ) yield session_data_writer session_data_writer.teardown() @pytest.fixture(scope="session") def custom_user_workspace(request): return request.config.getoption("--workspace") @pytest.fixture(scope="session") def llm_venv(llm_root, custom_user_workspace): workspace_dir = custom_user_workspace subdir = datetime.datetime.now().strftime("ws-%Y-%m-%d-%H-%M-%S") if workspace_dir is None: workspace_dir = "llm-test-workspace" workspace_dir = os.path.join(workspace_dir, subdir) from defs.local_venv import PythonVenvRunnerImpl return PythonVenvRunnerImpl("", "", "python3", os.path.join(os.getcwd(), workspace_dir)) @pytest.fixture(scope="session") @cached_in_llm_models_root("gpt-next/megatron_converted_843m_tp1_pp1.nemo", True) def gpt_next_root(): "get gpt-next/megatron_converted_843m_tp1_pp1.nemo" raise RuntimeError("megatron_converted_843m_tp1_pp1.nemo must be cached") @pytest.fixture(scope="function") def bert_model_root(hf_bert_model_root): "Get bert model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" bert_model_name = hf_bert_model_root bert_model_root = os.path.join(models_root, bert_model_name) assert os.path.exists( bert_model_root ), f"{bert_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return (bert_model_name, bert_model_root) @pytest.fixture(scope="function") def enc_dec_model_root(request): "Get enc-dec model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" tllm_model_name = request.param if not "wmt" in tllm_model_name: # HuggingFace root enc_dec_model_root = os.path.join(models_root, tllm_model_name) else: # FairSeq root enc_dec_model_root = os.path.join(models_root, "fairseq-models", tllm_model_name) assert os.path.exists( enc_dec_model_root ), f"{enc_dec_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return (tllm_model_name, enc_dec_model_root) @pytest.fixture(scope="function") def whisper_model_root(request): "Get whisper model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" assert request.param in [ "large-v2", "large-v3" ], "whisper only supports large-v2 or large-v3 for now" tllm_model_name = request.param whisper_model_root = os.path.join(models_root, "whisper-models", tllm_model_name) assert os.path.exists( whisper_model_root ), f"{whisper_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return (tllm_model_name, whisper_model_root) @pytest.fixture(scope="function") def whisper_example_audio_file(whisper_model_root): return os.path.join(whisper_model_root[1], "1221-135766-0002.wav") @pytest.fixture(scope="function") def multimodal_model_root(request, llm_venv): "Get multimodal model root" models_root = os.path.join(llm_models_root(), 'multimodals') assert models_root, "Did you set LLM_MODELS_ROOT?" tllm_model_name = request.param if 'VILA' in tllm_model_name: models_root = os.path.join(llm_models_root(), 'vila') if 'cogvlm-chat' in tllm_model_name: models_root = os.path.join(llm_models_root(), 'cogvlm-chat') if 'video-neva' in tllm_model_name: models_root = os.path.join(llm_models_root(), 'video-neva') tllm_model_name = tllm_model_name + ".nemo" if 'neva-22b' in tllm_model_name: models_root = os.path.join(llm_models_root(), 'neva') tllm_model_name = tllm_model_name + ".nemo" elif 'Llama-3.2' in tllm_model_name: models_root = os.path.join(llm_models_root(), 'llama-3.2-models') multimodal_model_root = os.path.join(models_root, tllm_model_name) if 'llava-onevision' in tllm_model_name and 'video' in tllm_model_name: multimodal_model_root = multimodal_model_root[:-6] elif 'llava-v1.6' in tllm_model_name and 'vision-trtllm' in tllm_model_name: multimodal_model_root = multimodal_model_root[:-14] assert os.path.exists( multimodal_model_root ), f"{multimodal_model_root} does not exist under NFS LLM_MODELS_ROOT dir" yield (tllm_model_name, multimodal_model_root) if 'llava-onevision' in tllm_model_name: llm_venv.run_cmd(['-m', 'pip', 'uninstall', 'llava', '-y']) @pytest.fixture(scope="function") def update_transformers(llm_venv, llm_root): yield llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(llm_root, "requirements.txt") ]) def remove_file(fn): if os.path.isfile(fn) or os.path.islink(fn): os.remove(fn) @pytest.fixture(scope="module") @cached_in_llm_models_root("replit-code-v1_5-3b", True) def llm_replit_code_v1_5_3b_model_root(): "Get replit-code-v1_5-3b model root" raise RuntimeError("replit-code-v1_5-3b must be cached") @pytest.fixture(scope="module") @cached_in_llm_models_root("gpt2", True) def llm_gpt2_model_root(): "Get gpt2 model root" raise RuntimeError("gpt2 must be cached") @pytest.fixture(scope="module") @cached_in_llm_models_root("gpt2-medium", True) def llm_gpt2_medium_model_root(): "Get gpt2 medium model root" raise RuntimeError("gpt2-medium must be cached") @pytest.fixture(scope="module") @cached_in_llm_models_root("GPT-2B-001_bf16_tp1.nemo", True) def llm_gpt2_next_model_root(): "get gpt-2b-001_bf16_tp1.nemo" raise RuntimeError("GPT-2B-001_bf16_tp1.nemo must be cached") @pytest.fixture(scope="module") @cached_in_llm_models_root("santacoder", True) def llm_gpt2_santacoder_model_root(): "get santacoder data" raise RuntimeError("santacoder must be cached") @pytest.fixture(scope="module") def llm_gpt2_starcoder_model_root(llm_venv, request): "get starcoder-model" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" starcoder_model_root = os.path.join(models_root, "starcoder-model") if hasattr(request, "param"): if request.param == "starcoder": starcoder_model_root = os.path.join(models_root, "starcoder-model") elif request.param == "starcoderplus": starcoder_model_root = os.path.join(models_root, "starcoderplus") elif request.param == "starcoder2": starcoder_model_root = os.path.join(models_root, "starcoder2-model") return starcoder_model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("starcoder2-3b", True) def llm_gpt2_starcoder2_model_root(): "get starcoder2-3b" raise RuntimeError("starcoder2-3b must be cached") @pytest.fixture(scope="function") def starcoder_model_root(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if request.param == "starcoder": starcoder_model_root = os.path.join(models_root, "starcoder-model") elif request.param == "starcoder2-15b": starcoder_model_root = os.path.join(models_root, "starcoder2-model") elif request.param == "starcoder2-3b": starcoder_model_root = os.path.join(models_root, "starcoder2-3b") elif request.param == "starcoderplus": starcoder_model_root = os.path.join(models_root, "starcoderplus") assert os.path.exists( starcoder_model_root ), f"{starcoder_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return starcoder_model_root @pytest.fixture(scope="function") def llm_gpt2b_lora_model_root(request): "get gpt2b lora model" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" model_root_list = [] lora_root = os.path.join(models_root, "lora", "gpt-next-2b") if hasattr(request, "param"): if isinstance(request.param, tuple): model_list = list(request.param) else: model_list = [request.param] for item in model_list: if item == "gpt2b_lora-900.nemo": model_root_list.append( os.path.join(lora_root, "gpt2b_lora-900.nemo")) elif item == "gpt2b_lora-stories.nemo": model_root_list.append( os.path.join(lora_root, "gpt2b_lora-stories.nemo")) return ",".join(model_root_list) @pytest.fixture(scope="module") def llama_tokenizer_model_root(): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" # Use llama-7b-hf to load tokenizer llama_tokenzier_model_root = os.path.join(models_root, "llama-models", "llama-7b-hf") return llama_tokenzier_model_root @pytest.fixture(scope="module") def llama_v2_tokenizer_model_root(): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" llama_v2_tokenizer_model_root = os.path.join(models_root, "llama-models-v2") assert os.path.exists( llama_v2_tokenizer_model_root ), f"{llama_v2_tokenizer_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return llama_v2_tokenizer_model_root @pytest.fixture(scope="function") def llama_model_root(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if request.param == "llama-7b": llama_model_root = os.path.join(models_root, "llama-models", "llama-7b-hf") elif request.param == "llama-30b": llama_model_root = os.path.join(models_root, "llama-models", "llama-30b-hf") elif request.param == "TinyLlama-1.1B-Chat-v1.0": llama_model_root = os.path.join(models_root, "llama-models-v2", "TinyLlama-1.1B-Chat-v1.0") elif request.param == "llama-v2-7b": llama_model_root = os.path.join(models_root, "llama-models-v2", "7B") elif request.param == "llama-v2-70b": llama_model_root = os.path.join(models_root, "llama-models-v2", "70B") elif request.param == "llama-v2-70b-hf": llama_model_root = os.path.join(models_root, "llama-models-v2", "llama-v2-70b-hf") elif request.param == "Llama-2-7B-AWQ": llama_model_root = os.path.join(models_root, "llama-models-v2", "Llama-2-7B-AWQ") elif request.param == "Llama-2-7B-GPTQ": llama_model_root = os.path.join(models_root, "llama-models-v2", "Llama-2-7B-GPTQ") elif request.param == "llama-v2-13b-hf": llama_model_root = os.path.join(models_root, "llama-models-v2", "llama-v2-13b-hf") elif request.param == "llama-v2-7b-hf": llama_model_root = os.path.join(models_root, "llama-models-v2", "llama-v2-7b-hf") elif request.param == "llama-v2-70b-hf": llama_model_root = os.path.join(models_root, "llama-models-v2", "llama-v2-70b-hf") elif request.param == "llama-v3-8b-hf": llama_model_root = os.path.join(models_root, "llama-models-v3", "8B") elif request.param == "llama-v3-8b-instruct-hf": llama_model_root = os.path.join(models_root, "llama-models-v3", "llama-v3-8b-instruct-hf") elif request.param == "Llama-3-8B-Instruct-Gradient-1048k": llama_model_root = os.path.join(models_root, "llama-models-v3", "Llama-3-8B-Instruct-Gradient-1048k") elif request.param == "Llama-3-70B-Instruct-Gradient-1048k": llama_model_root = os.path.join(models_root, "llama-models-v3", "Llama-3-70B-Instruct-Gradient-1048k") elif request.param == "llama-3.1-405b": llama_model_root = os.path.join(models_root, "llama-3.1-model", "Meta-Llama-3.1-405B") elif request.param == "llama-3.1-405b-fp8": llama_model_root = os.path.join(models_root, "llama-3.1-model", "Meta-Llama-3.1-405B-FP8") elif request.param == "llama-3.1-70b": llama_model_root = os.path.join(models_root, "llama-3.1-model", "Meta-Llama-3.1-70B") elif request.param == "llama-3.1-8b": llama_model_root = os.path.join(models_root, "llama-3.1-model", "Meta-Llama-3.1-8B") elif request.param == "llama-3.1-8b-instruct-hf-fp8": llama_model_root = os.path.join(models_root, "llama-3.1-model", "Llama-3.1-8B-Instruct-FP8") elif request.param == "llama-3.1-8b-hf-nvfp4": llama_model_root = os.path.join(models_root, "nvfp4-quantized", "Meta-Llama-3.1-8B") elif request.param == "llama-3.1-70b-instruct": llama_model_root = os.path.join(models_root, "llama-3.1-model", "Meta-Llama-3.1-70B-Instruct") elif request.param == "llama-3.2-1b": llama_model_root = os.path.join(models_root, "llama-3.2-models", "Llama-3.2-1B") elif request.param == "llama-3.2-3b": llama_model_root = os.path.join(models_root, "llama-3.2-models", "Llama-3.2-3B") assert os.path.exists( llama_model_root ), f"{llama_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return llama_model_root @pytest.fixture(scope="function") def code_llama_model_root(request): "get CodeLlama model data" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if request.param == "CodeLlama-7b-Instruct": codellama_model_root = os.path.join(models_root, "codellama", "CodeLlama-7b-Instruct-hf") elif request.param == "CodeLlama-13b-Instruct": codellama_model_root = os.path.join(models_root, "codellama", "CodeLlama-13b-Instruct-hf") elif request.param == "CodeLlama-34b-Instruct": codellama_model_root = os.path.join(models_root, "codellama", "CodeLlama-34b-Instruct-hf") elif request.param == "CodeLlama-70b-hf": codellama_model_root = os.path.join(models_root, "codellama", "CodeLlama-70b-hf") return codellama_model_root @pytest.fixture(scope="function") def draft_target_model_roots(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" draft_model_root = None target_model_root = None if request.param == "gpt2": draft_model_root = os.path.join(models_root, "gpt2-medium") target_model_root = os.path.join(models_root, "gpt2-medium") elif request.param == "llama_v2": draft_model_root = os.path.join(models_root, "llama-models-v2/llama-v2-7b-hf") target_model_root = os.path.join(models_root, "llama-models-v2/llama-v2-13b-hf") assert os.path.exists( draft_model_root ), f"Draft-Target-Model draft model path {draft_model_root} does not exist under NFS LLM_MODELS_ROOT dir" assert os.path.exists( target_model_root ), f"Draft-Target-Model target model path {target_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return draft_model_root, target_model_root @pytest.fixture(scope="function") def prompt_lookup_root(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if request.param == "gpt2": models_root = os.path.join(models_root, "gpt2-medium") elif request.param == "llama_v2": models_root = os.path.join(models_root, "llama-models-v2/llama-v2-13b-hf") assert os.path.exists( models_root ), f"Prompt-Lookup model path {models_root} does not exist under NFS LLM_MODELS_ROOT dir" return models_root @pytest.fixture(scope="function") def medusa_model_roots(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" base_model_root_for_medusa = None medusa_heads_model_root = None if request.param == "medusa-vicuna-7b-v1.3": base_model_root_for_medusa = os.path.join(models_root, "vicuna-7b-v1.3") medusa_heads_model_root = os.path.join(models_root, "medusa-vicuna-7b-v1.3") elif request.param == "llama3.1-medusa-8b-hf_v0.1": base_model_root_for_medusa = os.path.join(models_root, "llama3.1-medusa-8b-hf_v0.1") medusa_heads_model_root = base_model_root_for_medusa assert os.path.exists( base_model_root_for_medusa ), f"Medusa base model path {base_model_root_for_medusa} does not exist under NFS LLM_MODELS_ROOT dir" assert os.path.exists( medusa_heads_model_root ), f"Medusa heads model path {medusa_heads_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return base_model_root_for_medusa, medusa_heads_model_root @pytest.fixture(scope="function") def lookahead_model_roots(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" base_model_root_for_lookahead = None if request.param == "vicuna-7b-v1.3": base_model_root_for_lookahead = os.path.join(models_root, "vicuna-7b-v1.3") assert os.path.exists( base_model_root_for_lookahead ), f"Lookahead base model path {base_model_root_for_lookahead} does not exist under NFS LLM_MODELS_ROOT dir" return base_model_root_for_lookahead @pytest.fixture(scope="function") def redrafter_model_roots(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" base_model_root_for_redrafter = None redrafter_drafting_model_root = None if request.param == "redrafter-vicuna-7b-v1.3": base_model_root_for_redrafter = os.path.join(models_root, "vicuna-7b-v1.3") redrafter_drafting_model_root = os.path.join( models_root, "redrafter-vicuna-7b-v1.3") assert os.path.exists( base_model_root_for_redrafter ), f"ReDrafter base model path {base_model_root_for_redrafter} does not exist under NFS LLM_MODELS_ROOT dir" assert os.path.exists( redrafter_drafting_model_root ), f"ReDrafter heads model path {redrafter_drafting_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return base_model_root_for_redrafter, redrafter_drafting_model_root @pytest.fixture(scope="function") def eagle_model_roots(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" base_model_root_for_eagle = None eagle_heads_model_root = None if request.param == "EAGLE-Vicuna-7B-v1.3": # Test the checkpoint released from HF, which requires two separate weights, # one for the base model and one for the EagleNets. base_model_root_for_eagle = os.path.join(models_root, "vicuna-7b-v1.3") eagle_heads_model_root = os.path.join(models_root, "EAGLE-Vicuna-7B-v1.3") assert os.path.exists( base_model_root_for_eagle ), f"EAGLE base model path {base_model_root_for_eagle} does not exist under NFS LLM_MODELS_ROOT dir" assert os.path.exists( eagle_heads_model_root ), f"EAGLE heads model path {eagle_heads_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return base_model_root_for_eagle, eagle_heads_model_root elif request.param == "llama3.1-eagle-8b-hf_v0.5": # Test the checkpoint released from ModelOpt, which only requires one weight, # which includes both the base model and EagleNets, and is an FP8 datatype. modelopt_checkpoint_root_for_eagle = os.path.join( models_root, "modelopt-hf-model-hub", "llama3.1-eagle-8b-hf_v0.5") assert os.path.exists( modelopt_checkpoint_root_for_eagle ), f"EAGLE ModelOpt checkpoint path {modelopt_checkpoint_root_for_eagle} does not exist under NFS LLM_MODELS_ROOT dir" return modelopt_checkpoint_root_for_eagle else: assert "Error Eagle weight's name" @pytest.fixture(scope="function") def mamba_model_root(request): "get mamba model data" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" mamba_model_root = os.path.join(models_root, 'mamba', "mamba-130m-hf") if hasattr(request, "param"): if request.param == "mamba-2.8b": mamba_model_root = os.path.join(models_root, 'mamba', "mamba-2.8b-hf") elif request.param == "mamba-130m": mamba_model_root = os.path.join(models_root, 'mamba', "mamba-130m-hf") elif request.param == "mamba-1.4b": mamba_model_root = os.path.join(models_root, 'mamba', "mamba-1.4b-hf") elif request.param == "mamba-790m": mamba_model_root = os.path.join(models_root, 'mamba', "mamba-790m-hf") elif request.param == "mamba-370m": mamba_model_root = os.path.join(models_root, 'mamba', "mamba-370m-hf") elif request.param == "mamba2-2.7b": mamba_model_root = os.path.join(models_root, 'mamba2', "mamba2-2.7b") elif request.param == "mamba2-1.3b": mamba_model_root = os.path.join(models_root, 'mamba2', "mamba2-1.3b") elif request.param == "mamba2-780m": mamba_model_root = os.path.join(models_root, 'mamba2', "mamba2-780m") elif request.param == "mamba2-370m": mamba_model_root = os.path.join(models_root, 'mamba2', "mamba2-370m") elif request.param == "mamba2-130m": mamba_model_root = os.path.join(models_root, 'mamba2', "mamba2-130m") elif request.param == "mamba-codestral-7B-v0.1": mamba_model_root = os.path.join(models_root, 'mamba2', "mamba-codestral-7B-v0.1") assert exists(mamba_model_root), f"{mamba_model_root} does not exist!" return mamba_model_root @pytest.fixture(scope="function") def recurrentgemma_model_root(request): "get recurrentgemma model data" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" assert hasattr(request, "param"), "Param is missing!" if request.param == "recurrentgemma-2b": recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma", "recurrentgemma-2b") elif request.param == "recurrentgemma-2b-it": recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma", "recurrentgemma-2b-it") elif request.param == "recurrentgemma-2b-flax": recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma", "recurrentgemma-2b-flax", "2b") elif request.param == "recurrentgemma-2b-it-flax": recurrentgemma_model_root = os.path.join(models_root, "recurrentgemma", "recurrentgemma-2b-it-flax", "2b-it") assert exists(recurrentgemma_model_root ), f"{recurrentgemma_model_root} does not exist!" return recurrentgemma_model_root @pytest.fixture(scope="function") def nemotron_nas_model_root(request): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" assert hasattr(request, "param"), "Param is missing!" nemotron_nas_model_root = os.path.join(models_root, "nemotron-nas", request.param) assert exists( nemotron_nas_model_root), f"{nemotron_nas_model_root} doesn't exist!" return nemotron_nas_model_root @pytest.fixture(scope="function") def llm_lora_model_root(request): "get lora model path" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" assert hasattr(request, "param"), "Param is missing!" model_list = [] model_root_list = [] if isinstance(request.param, tuple): model_list = list(request.param) else: model_list = [request.param] for item in model_list: if item == "chinese-llama-2-lora-13b": model_root_list.append( os.path.join(models_root, "llama-models-v2", "chinese-llama-2-lora-13b")) elif item == "Japanese-Alpaca-LoRA-7b-v0": model_root_list.append( os.path.join(models_root, "llama-models", "Japanese-Alpaca-LoRA-7b-v0")) elif item == "luotuo-lora-7b-0.1": model_root_list.append( os.path.join(models_root, "llama-models", "luotuo-lora-7b-0.1")) elif item == "Ko-QWEN-7B-Chat-LoRA": model_root_list.append( os.path.join(models_root, "Ko-QWEN-7B-Chat-LoRA")) elif item == "Qwen1.5-7B-Chat-750Mb-lora": model_root_list.append( os.path.join(models_root, "Qwen1.5-7B-Chat-750Mb-lora")) elif item == "Upcycled-Qwen1.5-MoE2.7B-LoRA": model_root_list.append( os.path.join(models_root, "Upcycled-Qwen1.5-MoE2.7B-LoRA")) elif item == "Phi-3-mini-4k-instruct-ru-lora": model_root_list.append( os.path.join(models_root, "lora", "phi", "Phi-3-mini-4k-instruct-ru-lora")) elif item == "peft-lora-starcoder2-15b-unity-copilot": model_root_list.append( os.path.join(models_root, "lora", "starcoder", "peft-lora-starcoder2-15b-unity-copilot")) elif item == "chinese-mixtral-lora": model_root_list.append( os.path.join(models_root, "chinese-mixtral-lora")) elif item == "komt-mistral-7b-v1-lora": model_root_list.append( os.path.join(models_root, "komt-mistral-7b-v1-lora")) return ",".join(model_root_list) @pytest.fixture(scope="function") def llm_dora_model_root(request): "get dora model path" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" assert hasattr(request, "param"), "Param is missing!" model_list = [] model_root_list = [] if isinstance(request.param, tuple): model_list = list(request.param) else: model_list = [request.param] for item in model_list: if item == "commonsense-llama-v3-8b-dora-r32": model_root_list.append( os.path.join(models_root, "llama-models-v3", "DoRA-weights", "llama_dora_commonsense_checkpoints", "LLama3-8B", "dora_r32")) return ",".join(model_root_list) @pytest.fixture(scope="function") def llm_mistral_model_root(request): "get mistral model path" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" model_root = os.path.join(models_root, "mistral-7b-v0.1") if request.param == "mistral-7b-v0.1": model_root = os.path.join(models_root, "mistral-7b-v0.1") if request.param == "komt-mistral-7b-v1": model_root = os.path.join(models_root, "komt-mistral-7b-v1") if request.param == "mistral-7b-v0.3": model_root = os.path.join(models_root, "Mistral-7B-Instruct-v0.3") return model_root @pytest.fixture(scope="function") def llm_mixtral_model_root(request): "get mixtral model path" models_root = llm_models_root() model_root = os.path.join(models_root, "Mixtral-8x7B-v0.1") assert models_root, "Did you set LLM_MODELS_ROOT?" if request.param == "Mixtral-8x7B-v0.1": model_root = os.path.join(models_root, "Mixtral-8x7B-v0.1") if request.param == "Mixtral-8x22B-v0.1": model_root = os.path.join(models_root, "Mixtral-8x22B-v0.1") if request.param == "Mixtral-8x7B-Instruct-v0.1": model_root = os.path.join(models_root, "Mixtral-8x7B-Instruct-v0.1") return model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("mathstral-7B-v0.1", True) def llm_mathstral_model_root(llm_venv): "return mathstral-7B-v0.1 model root" workspace = llm_venv.get_working_directory() long_mathstral_model_root = os.path.join(workspace, "mathstral-7B-v0.1") return long_mathstral_model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("LongAlpaca-7B", True) def llm_long_alpaca_model_root(llm_venv): "return long alpaca model root" workspace = llm_venv.get_working_directory() long_alpaca_model_root = os.path.join(workspace, "LongAlpaca-7B") return long_alpaca_model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("gpt-neox-20b", True) def llm_gptneox_model_root(llm_venv): "return gptneox model root" workspace = llm_venv.get_working_directory() gptneox_model_root = os.path.join(workspace, "gpt-neox-20b") return gptneox_model_root @pytest.fixture(scope="function") def llm_phi_model_root(request): "return phi model root" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if 'Phi-3.5' in request.param: phi_model_root = os.path.join(models_root, 'Phi-3.5/' + request.param) elif 'Phi-3' in request.param: phi_model_root = os.path.join(models_root, 'Phi-3/' + request.param) else: phi_model_root = os.path.join(models_root, request.param) assert os.path.exists( phi_model_root ), f"{phi_model_root} does not exist under NFS LLM_MODELS_ROOT dir" return phi_model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("falcon-180b", True) def llm_falcon_180b_model_root(): "prepare falcon 180b model & return falcon model root" raise RuntimeError("falcon 180b must be cached") @pytest.fixture(scope="module") @cached_in_llm_models_root("falcon-11B", True) def llm_falcon_11b_model_root(llm_venv): "prepare falcon-11B model & return falcon model root" workspace = llm_venv.get_working_directory() model_root = os.path.join(workspace, "falcon-11B") call(f"git clone https://huggingface.co/tiiuae/falcon-11B {model_root}", shell=True) return model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("email_composition", True) def llm_gpt2_next_8b_model_root(): raise RuntimeError("gpt-next 8b must be cached") @pytest.fixture(scope="function") def llm_glm_4_9b_model_root(request): "prepare glm-4-9b model & return model path" model_name = request.param models_root = llm_models_root() if model_name == "glm-4-9b": model_root = os.path.join(models_root, "glm-4-9b") elif model_name == "glm-4-9b-chat": model_root = os.path.join(models_root, "glm-4-9b-chat") elif model_name == "glm-4-9b-chat-1m": model_root = os.path.join(models_root, "glm-4-9b-chat-1m") elif model_name == "glm-4v-9b": model_root = os.path.join(models_root, "glm-4v-9b") return model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("internlm-chat-7b", True) def llm_internlm_7b_model_root(llm_venv): "prepare internlm 7b model" workspace = llm_venv.get_working_directory() model_root = os.path.join(workspace, "internlm-chat-7b") call( f"git clone https://huggingface.co/internlm/internlm-chat-7b {model_root}", shell=True) return model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("internlm2-7b", True) def llm_internlm2_7b_model_root(llm_venv): "prepare internlm2 7b model" workspace = llm_venv.get_working_directory() model_root = os.path.join(workspace, "internlm2-7b") call(f"git clone https://huggingface.co/internlm/internlm2-7b {model_root}", shell=True) return model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("internlm-chat-20b", True) def llm_internlm_20b_model_root(llm_venv): "prepare internlm 20b model" workspace = llm_venv.get_working_directory() model_root = os.path.join(workspace, "internlm-chat-20b") call( f"git clone https://huggingface.co/internlm/internlm-chat-20b {model_root}", shell=True) return model_root @pytest.fixture(scope="module") @cached_in_llm_models_root("Qwen-7B-Chat", True) def llm_qwen_7b_model_root(llm_venv): "prepare qwen-7b model & return model path" workspace = llm_venv.get_working_directory() model_root = os.path.join(workspace, "Qwen-7B-Chat") return model_root @pytest.fixture(scope="function") def llm_qwen_model_root(request, llm_venv): "prepare qwen model & return model path" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat") if hasattr(request, "param"): if request.param == "qwen_7b_chat": qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat") elif request.param == "qwen_14b_chat": qwen_model_root = os.path.join(models_root, "Qwen-14B-Chat") elif request.param == "qwen_72b_chat": qwen_model_root = os.path.join(models_root, "Qwen-72B-Chat") elif request.param == "qwen_7b_chat_int4": qwen_model_root = os.path.join(models_root, "Qwen-7B-Chat-Int4") elif request.param == "qwen-vl-chat": qwen_model_root = os.path.join(models_root, "Qwen-VL-Chat") elif request.param == "qwen1.5_7b_chat_awq": qwen_model_root = os.path.join(models_root, "Qwen1.5-7B-Chat-AWQ") elif request.param == "qwen1.5_0.5b_chat": qwen_model_root = os.path.join(models_root, "Qwen1.5-0.5B-Chat") elif request.param == "qwen1.5_7b_chat": qwen_model_root = os.path.join(models_root, "Qwen1.5-7B-Chat") elif request.param == "qwen1.5_14b_chat": qwen_model_root = os.path.join(models_root, "Qwen1.5-14B-Chat") elif request.param == "qwen1.5_moe_a2.7b_chat": qwen_model_root = os.path.join(models_root, "Qwen1.5-MoE-A2.7B-Chat") elif request.param == "qwen1.5_72b_chat": qwen_model_root = os.path.join(models_root, "Qwen1.5-72B-Chat") elif request.param == "qwen1.5_moe_a2.7b_chat": qwen_model_root = os.path.join(models_root, "Qwen1.5-MoE-A2.7B-Chat") elif request.param == "qwen1.5_14b_chat_int4": qwen_model_root = os.path.join(models_root, "Qwen1.5-14B-Chat-GPTQ-Int4") elif request.param == "qwen2_0.5b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2-0.5B-Instruct") elif request.param == "qwen2_7b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2-7B-Instruct") elif request.param == "qwen2_7b_awq": qwen_model_root = os.path.join(models_root, "Qwen2-7B-Instruct-AWQ") elif request.param == "qwen2_57b_a14b": qwen_model_root = os.path.join(models_root, "Qwen2-57B-A14B") elif request.param == "qwen2_72b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2-72B-Instruct") elif request.param == "qwen2_vl_7b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2-VL-7B-Instruct") elif request.param == "qwen2_audio_7b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2-Audio-7B-Instruct") elif request.param == "qwen2.5_0.5b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2.5-0.5B-Instruct") elif request.param == "qwen2.5_1.5b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2.5-1.5B-Instruct") elif request.param == "qwen2.5_7b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2.5-7B-Instruct") elif request.param == "qwen2.5_14b_instruct_int4": qwen_model_root = os.path.join(models_root, "Qwen2.5-14B-Instruct-GPTQ-Int4") elif request.param == "qwen2.5_72b_instruct": qwen_model_root = os.path.join(models_root, "Qwen2.5-72B-Instruct") assert exists(qwen_model_root), f"{qwen_model_root} does not exist!" return qwen_model_root @pytest.fixture(scope="function") def llm_granite_model_root(request): models_root = llm_models_root() model_name = request.param granite_model_root = os.path.join(models_root, model_name) assert exists(granite_model_root), f"{granite_model_root} does not exist!" return granite_model_root @pytest.fixture(scope="session") @cached_in_llm_models_root("nemotron/Nemotron-3-8B-Base-4k.nemo", True) def llm_nemotron_3_8b_model_root(): "get nemotron/Nemotron-3-8B-Base-4k.nemo" raise RuntimeError("nemotron/Nemotron-3-8B-Base-4k.nemo must be cached") @pytest.fixture(scope="session") @cached_in_llm_models_root("nemotron/Nemotron-4-15B-Base.nemo", True) def llm_nemotron_4_15b_model_root(): "get nemotron/Nemotron-4-15B-Base.nemo" raise RuntimeError("nemotron/Nemotron-4-15B-Base.nemo must be cached") @pytest.fixture(scope="session") def mmlu_dataset_root(): models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" mmlu_dataset_root = os.path.join(models_root, "datasets", "mmlu") assert os.path.exists( mmlu_dataset_root ), f"{mmlu_dataset_root} does not exist under NFS LLM_MODELS_ROOT dir" return mmlu_dataset_root @pytest.fixture(scope="function") def deepseek_model_root(request): "get deepseek model" models_root = llm_models_root() assert models_root, "Did you set LLM_MODELS_ROOT?" if request.param == "deepseek-coder-6.7b-instruct": model_root = os.path.join(models_root, "deepseek-coder-6.7b-instruct") return model_root @pytest.fixture(scope="module") def llm_commandr_v01_model_root(llm_venv): "prepare command-r model & return model path" models_root = llm_models_root() model_root = os.path.join(models_root, "c4ai-command-r-v01") return model_root @pytest.fixture(scope="module") def llm_commandr_plus_model_root(llm_venv): "prepare command-r-plus model & return model path" models_root = llm_models_root() model_root = os.path.join(models_root, "c4ai-command-r-plus") return model_root @pytest.fixture(scope="module") def llm_aya_23_8b_model_root(llm_venv): "prepare Aya-23-8B model & return model path" models_root = llm_models_root() model_root = os.path.join(models_root, "aya-23-8B") return model_root @pytest.fixture(scope="module") def llm_aya_23_35b_model_root(llm_venv): "prepare Aya-23-35B model & return model path" models_root = llm_models_root() model_root = os.path.join(models_root, "aya-23-35B") return model_root def evaltool_mmlu_post_process(results_path, baseline, threshold): # Note: In the older version of the lm-harness result file, # there are 57 values. # The latest version of lm-harness includes # 4 additional categories and 1 whole dataset in the result file. # We need to exclude these new categories and # the whole dataset when calculating the average. with open(results_path) as f: result = json.load(f) acc_acc = 0.0 tasks_to_ignore = [ "mmlu_str", "mmlu_str_stem", "mmlu_str_other", "mmlu_str_social_sciences", "mmlu_str_humanities" ] total_task = len(result['results']) - len(tasks_to_ignore) assert total_task == 57 for sub_task in result['results']: if sub_task in tasks_to_ignore: continue acc_acc += float(result['results'][sub_task]['exact_match,none']) avg_acc = acc_acc / total_task print("MMLU avg accuracy:", avg_acc) assert abs(avg_acc - baseline) <= threshold def evaltool_wikilingua_post_process(results_path, baseline, threshold): with open(results_path) as f: result = json.load(f) rouge_l = result['results']['wikilingua_english']['rougeL,none'] print("Wikilingua_english rouge_L:", rouge_l) assert abs(rouge_l - baseline) <= threshold def evaltool_humaneval_post_process(results_path, baseline, threshold): with open(results_path) as f: result = json.load(f) print(result) acc = result[0]['humaneval']['pass@1'] assert abs(acc - baseline) <= threshold def evaltool_mtbench_post_process(results_path, baseline, threshold): with open(results_path) as f: get_result = False for total_score in f: if total_score.startswith('total'): get_result = True total_score = float(total_score.split(',')[1].strip()) assert abs(total_score - baseline) <= threshold assert get_result @pytest.fixture(scope="module") def evaltool_root(llm_venv): if GITLAB_API_USER is None or GITLAB_API_TOKEN is None or EVALTOOL_REPO_URL is None: pytest.skip( "Need to set GITLAB_API_USER, GITLAB_API_TOKEN, and EVALTOOL_REPO_URL env vars to run evaltool tests." ) workspace = llm_venv.get_working_directory() clone_dir = os.path.join(workspace, "eval-tool") repo_url = f"https://{GITLAB_API_USER}:{GITLAB_API_TOKEN}@{EVALTOOL_REPO_URL}" branch_name = "dev/0.9" from evaltool.constants import EVALTOOL_SETUP_SCRIPT evaltool_setup_cmd = [ EVALTOOL_SETUP_SCRIPT, "-b", branch_name, "-d", clone_dir, "-r", repo_url ] call(" ".join(evaltool_setup_cmd), shell=True) return clone_dir @pytest.fixture(scope="function") def engine_dir(llm_venv, capfd): "Get engine dir" engine_path = os.path.join(llm_venv.get_working_directory(), "engines") print_storage_usage(llm_venv.get_working_directory(), "before removing existing engines", capfd) # clean the engine dir for each case. cur_time = time.time() expire = time.time() + 60 while exists(engine_path) and cur_time < expire: shutil.rmtree(engine_path, ignore_errors=True) time.sleep(2) cur_time = time.time() print_storage_usage(llm_venv.get_working_directory(), "after removing existing engines", capfd) return engine_path @pytest.fixture(scope="function") def cmodel_dir(llm_venv): "converted model dir" model_dir = os.path.join(llm_venv.get_working_directory(), "cmodels") yield model_dir if exists(model_dir): shutil.rmtree(model_dir) @pytest.fixture(scope="module") def qcache_dir(llm_venv, llm_root): "get quantization cache dir" defs.ci_profiler.start("qcache_dir") cache_dir = os.path.join(llm_venv.get_working_directory(), "qcache") quantization_root = os.path.join(llm_root, "examples", "quantization") import platform # Fix the issue that the requirements.txt is not available on aarch64. if "aarch64" not in platform.machine() and get_sm_version() >= 89: llm_venv.run_cmd([ "-m", "pip", "install", "-r", os.path.join(quantization_root, "requirements.txt") ]) if not exists(cache_dir): makedirs(cache_dir) yield cache_dir if exists(cache_dir): shutil.rmtree(cache_dir) defs.ci_profiler.stop("qcache_dir") print( f"qcache_dir: {defs.ci_profiler.elapsed_time_in_sec('qcache_dir')} sec") @pytest.fixture(scope="module") def qcache_dir_without_install_package(llm_venv, llm_root): "get quantization cache dir" defs.ci_profiler.start("qcache_dir_without_install_package") cache_dir = os.path.join(llm_venv.get_working_directory(), "qcache") if not exists(cache_dir): makedirs(cache_dir) yield cache_dir if exists(cache_dir): shutil.rmtree(cache_dir) defs.ci_profiler.stop("qcache_dir_without_install_package") print( f"qcache_dir_without_install_package: {defs.ci_profiler.elapsed_time_in_sec('qcache_dir_without_install_package')} sec" ) @pytest.fixture(scope="module") def star_attention_input_root(llm_root): "Get star attention input file dir" star_attention_input_root = unittest_path() / "_torch" / "multi_gpu" return star_attention_input_root @pytest.fixture(autouse=True) def skip_by_device_count(request): "fixture for skip less device count" if request.node.get_closest_marker('skip_less_device'): device_count = get_device_count() expected_count = request.node.get_closest_marker( 'skip_less_device').args[0] if expected_count > int(device_count): pytest.skip( f'Device count {device_count} is less than {expected_count}') @pytest.fixture(autouse=True) def skip_by_device_memory(request): "fixture for skip less device memory" if request.node.get_closest_marker('skip_less_device_memory'): device_memory = get_device_memory() expected_memory = request.node.get_closest_marker( 'skip_less_device_memory').args[0] if expected_memory > int(device_memory): pytest.skip( f'Device memory {device_memory} is less than {expected_memory}') def get_sm_version(): "get compute capability" with tempfile.TemporaryDirectory() as temp_dirname: suffix = ".exe" if is_windows() else "" # TODO: Use NRSU because we can't assume nvidia-smi across all platforms. cmd = " ".join([ "nvidia-smi" + suffix, "--query-gpu=compute_cap", "--format=csv,noheader" ]) output = check_output(cmd, shell=True, cwd=temp_dirname) compute_cap = output.strip().split("\n")[0] sm_major, sm_minor = list(map(int, compute_cap.split("."))) return sm_major * 10 + sm_minor skip_pre_ada = pytest.mark.skipif( get_sm_version() < 89, reason="This test is not supported in pre-Ada architecture") skip_pre_hopper = pytest.mark.skipif( get_sm_version() < 90, reason="This test is not supported in pre-Hopper architecture") skip_pre_blackwell = pytest.mark.skipif( get_sm_version() < 100, reason="This test is not supported in pre-Blackwell architecture") skip_post_blackwell = pytest.mark.skipif( get_sm_version() >= 100, reason="This test is not supported in post-Blackwell architecture") skip_no_nvls = pytest.mark.skipif(not ipc_nvls_supported(), reason="NVLS is not supported") def skip_fp8_pre_ada(use_fp8): "skip fp8 tests if sm version less than 8.9" if use_fp8 and get_sm_version() < 89: pytest.skip("FP8 is not supported on pre-Ada architectures") def skip_fp4_pre_blackwell(use_fp4): "skip fp4 tests if sm version less than 10.0" if use_fp4 and get_sm_version() < 100: pytest.skip("FP4 is not supported on pre-Blackwell architectures") @pytest.fixture(autouse=True) def skip_device_not_contain(request): "skip test if device not contain keyword" if request.node.get_closest_marker('skip_device_not_contain'): keyword_list = request.node.get_closest_marker( 'skip_device_not_contain').args[0] device = get_gpu_device_list()[0] if not any(keyword in device for keyword in keyword_list): pytest.skip( f"Device {device} does not contain keyword in {keyword_list}.") def get_gpu_device_list(): "get device list" with tempfile.TemporaryDirectory() as temp_dirname: suffix = ".exe" if is_windows() else "" # TODO: Use NRSU because we can't assume nvidia-smi across all platforms. cmd = " ".join(["nvidia-smi" + suffix, "-L"]) output = check_output(cmd, shell=True, cwd=temp_dirname) return [l.strip() for l in output.strip().split("\n")] def get_device_count(): "return device count" return len(get_gpu_device_list()) def get_device_memory(): "get gpu memory" memory = 0 with tempfile.TemporaryDirectory() as temp_dirname: suffix = ".exe" if is_windows() else "" # TODO: Use NRSU because we can't assume nvidia-smi across all platforms. cmd = " ".join([ "nvidia-smi" + suffix, "--query-gpu=memory.total", "--format=csv,noheader" ]) output = check_output(cmd, shell=True, cwd=temp_dirname) memory = int(output.strip().split()[0]) return memory # # When test parameters have an empty id, older versions of pytest ignored that parameter when generating the # test node's ID completely. This however was actually a bug, and not expected behavior that got fixed in newer # versions of pytest:https://github.com/pytest-dev/pytest/pull/6607. TRT test defs however rely on this behavior # for quite a few test names. This is a hacky WAR that restores the old behavior back so that the # test names do not change. Note: This might break in a future pytest version. # # TODO: Remove this hack once the test names are fixed. # from _pytest.python import CallSpec2 CallSpec2.id = property( lambda self: "-".join(map(str, filter(None, self._idlist)))) def pytest_addoption(parser): parser.addoption( "--test-list", "-F", action="store", default=None, help="Path to the file containing the list of tests to run") parser.addoption( "--workspace", "--ws", action="store", default=None, help="Workspace path to store temp data generated during the tests") parser.addoption( "--waives-file", "-S", action="store", default=None, help= "Specify a file containing a list of waives, one per line. After filtering collected tests, Pytest will " "apply the waive state specified by this file to the set of tests to be run." ) parser.addoption( "--output-dir", "-O", action="store", default=None, help= "Directory to store test output. Should point to a new or existing empty directory." ) parser.addoption( "--test-prefix", "-P", action="store", default=None, help= "It is useful when using such prefix to mapping waive lists for specific GPU, such as 'GH200'" ) parser.addoption("--regexp", "-R", action='store', default=None, help="A regexp to specify which tests to run") parser.addoption( "--apply-test-list-correction", "-C", action='store_true', help= "Attempt to automatically correct invalid test names in filter files and print the correct name in terminal. " "If the correct name cannot be determined, the invalid test name will be printed to the terminal as well." ) parser.addoption("--perf", action="store_true", help="'--perf' will run perf tests") parser.addoption( "--perf-log-formats", help= "Supply either 'yaml' or 'csv' as values. Supply multiple same flags for multiple formats.", action="append", default=[]) @pytest.hookimpl(trylast=True) def pytest_generate_tests(metafunc: pytest.Metafunc): if metafunc.definition.function.__name__ != 'test_unittests_v2': return testlist_path = metafunc.config.getoption("--test-list") if not testlist_path: return with open(testlist_path, "r") as f: lines = f.readlines() lines = preprocess_test_list_lines(testlist_path, lines) uts = [] for line in lines: if line.startswith("unittest/"): uts.append(line.strip()) metafunc.parametrize("case", uts, ids=lambda x: x) @pytest.hookimpl(tryfirst=True, hookwrapper=True) def pytest_collection_modifyitems(session, config, items): testlist_path = config.getoption("--test-list") waives_file = config.getoption("--waives-file") test_prefix = config.getoption("--test-prefix") perf_test = config.getoption("--perf") if perf_test: global ALL_PYTEST_ITEMS ALL_PYTEST_ITEMS = None import copy # Do not import at global level since that would create cyclic imports. from .perf.test_perf import generate_perf_tests # Perf tests are generated based on the test list to speed up the test collection time. items = generate_perf_tests(session, config, items) ALL_PYTEST_ITEMS = copy.copy(items) if test_prefix: # Override the internal nodeid of each item to contain the correct test prefix. # This is needed for reporting to correctly process the test name in order to bucket # it into the appropriate test suite. for item in items: item._nodeid = "{}/{}".format(test_prefix, item._nodeid) regexp = config.getoption("--regexp") if testlist_path: modify_by_test_list(testlist_path, items, config) if regexp is not None: deselect_by_regex(regexp, items, test_prefix, config) if waives_file: apply_waives(waives_file, items, config) # We have to remove prefix temporarily before splitting the test list # After that change back the test id. for item in items: if test_prefix and item._nodeid.startswith(f"{test_prefix}/"): item._nodeid = item._nodeid[len(f"{test_prefix}/"):] yield for item in items: if test_prefix: item._nodeid = f"{test_prefix}/{item._nodeid}" def deselect_by_regex(regexp, items, test_prefix, config): """Filter out tests based on the patterns specified in the given list of regular expressions. If a test matches *any* of the expressions in the list it is considered selected.""" compiled_regexes = [] regex_list = [] r = re.compile(regexp) compiled_regexes.append(r) regex_list.append(regexp) selected = [] deselected = [] corrections = get_test_name_corrections_v2(set(regex_list), set(it.nodeid for it in items), TestCorrectionMode.REGEX) handle_corrections(corrections, test_prefix) for item in items: found = False for regex in compiled_regexes: if regex.search(item.nodeid): found = True break if found: selected.append(item) else: deselected.append(item) if deselected: config.hook.pytest_deselected(items=deselected) items[:] = selected @pytest.hookimpl(hookwrapper=True) def pytest_runtest_makereport(item, call): outcome = yield report = outcome.get_result() if call.when == "call": report.file = str(item.fspath) report.line = str(item.location[1]) report.url = "" @pytest.fixture(scope="session") def all_pytest_items(): """ Provides all pytest items available in the current test definitions, before any filtering has been applied. """ return ALL_PYTEST_ITEMS @pytest.fixture(scope="session") def turtle_root(): return os.path.dirname(os.path.dirname(__file__)) @pytest.fixture(scope="function") def test_case(request, llm_root): "get test case" test_cases_file = "tests/integration/defs/test_cases.yml" input_file_dir = "tests/integration/test_input_files" test_cases_file_path = os.path.join(llm_root, test_cases_file) case_name = request.param with open(test_cases_file_path, 'r', encoding='UTF-8') as file: test_cases = yaml.safe_load(file) case = test_cases["test_cases"][case_name] input_file = case["input_file"] case["input_file"] = os.path.join(llm_root, input_file_dir, input_file) return case def check_nvlink(): "check nvlink status" with tempfile.TemporaryDirectory() as temp_dirname: try: suffix = ".exe" if is_windows() else "" # TODO: Use NRSU because we can't assume nvidia-smi across all platforms. cmd = " ".join(["nvidia-smi" + suffix, "nvlink", "-s", "-i", "0"]) output = check_output(cmd, shell=True, cwd=temp_dirname) except sp.CalledProcessError: return False if len(output.strip()) == 0: return False return "inActive" not in output.strip() skip_nvlink_inactive = pytest.mark.skipif(check_nvlink() is False, reason="nvlink is inactive.") @pytest.fixture(scope="function") def eval_venv(llm_venv): "set UCC_TEAM_IDS_POOL_SIZE=1024" llm_venv._new_env["UCC_TEAM_IDS_POOL_SIZE"] = "1024" yield llm_venv llm_venv._new_env.pop("UCC_TEAM_IDS_POOL_SIZE") def get_host_total_memory(): "get host memory Mib" memory = psutil.virtual_memory().total return int(memory / 1024 / 1024) @pytest.fixture(autouse=True) def skip_by_host_memory(request): "fixture for skip less host memory" if request.node.get_closest_marker('skip_less_host_memory'): host_memory = get_host_total_memory() expected_memory = request.node.get_closest_marker( 'skip_less_host_memory').args[0] if expected_memory > int(host_memory): pytest.skip( f'Host memory {host_memory} is less than {expected_memory}') IS_UNDER_CI_ENV = 'JENKINS_HOME' in os.environ def collect_status(): if not IS_UNDER_CI_ENV: return import psutil import pynvml pynvml.nvmlInit() handles = { idx: pynvml.nvmlDeviceGetHandleByIndex(idx) for idx in range(pynvml.nvmlDeviceGetCount()) } gpu_memory = {} for idx, device in handles.items(): total_used = pynvml.nvmlDeviceGetMemoryInfo(device).used // 1024 // 1024 total = pynvml.nvmlDeviceGetMemoryInfo(device).total // 1024 // 1024 detail = pynvml.nvmlDeviceGetComputeRunningProcesses(device) process = {} for entry in detail: host_memory_in_mbs = -1 try: host_memory_in_mbs = psutil.Process( entry.pid).memory_full_info().uss // 1024 // 1024 process[entry.pid] = (entry.usedGpuMemory // 1024 // 1024, host_memory_in_mbs) except: pass gpu_memory[idx] = { "total_used": total_used, 'total': total, "process": process } print('\nCurrent memory status:') print(gpu_memory) @pytest.hookimpl(wrapper=True) def pytest_runtest_protocol(item, nextitem): ret = yield collect_status() return ret @pytest.fixture(scope="function") def deterministic_test_root(llm_root, llm_venv): "Get deterministic test root" deterministic_root = os.path.join(llm_root, "tests/integration/defs/deterministic") return deterministic_root @pytest.fixture(scope="function") def disaggregated_test_root(llm_root, llm_venv): "Get disaggregated test root" disaggregated_root = os.path.join(llm_root, "tests/integration/defs/disaggregated") return disaggregated_root