mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[Infra]Remove some old keyword (#4552)
Signed-off-by: qqiao <qqiao@nvidia.com>
This commit is contained in:
parent
8cb6163a57
commit
c945e92fdb
@ -1504,7 +1504,7 @@ def runInKubernetes(pipeline, podSpec, containerName)
|
||||
def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
{
|
||||
def dockerArgs = "-v /mnt/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog"
|
||||
turtleConfigs = [
|
||||
x86TestConfigs = [
|
||||
"DGX_H100-4_GPUs-PyTorch-DeepSeek-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
"DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
"DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
@ -1579,7 +1579,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
"DGX_H200-4_GPUs-PyTorch-[Post-Merge]-2": ["dgx-h200-x4", "l0_dgx_h200", 2, 2, 4],
|
||||
]
|
||||
|
||||
parallelJobs = turtleConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
|
||||
parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
|
||||
def config = VANILLA_CONFIG
|
||||
if (key.contains("single-device")) {
|
||||
config = SINGLE_DEVICE_CONFIG
|
||||
@ -1591,13 +1591,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
}]]}
|
||||
fullSet = parallelJobs.keySet()
|
||||
|
||||
slurmX86Configs = [
|
||||
x86SlurmTestConfigs = [
|
||||
"RTXPro6000-PyTorch-[Post-Merge]-1": ["rtx-pro-6000", "l0_rtx_pro_6000", 1, 1],
|
||||
"DGX_B200-4_GPUs-PyTorch-[Post-Merge]-1": ["b200-4-gpus", "l0_dgx_b200", 1, 1, 4],
|
||||
]
|
||||
fullSet += slurmX86Configs.keySet()
|
||||
fullSet += x86SlurmTestConfigs.keySet()
|
||||
|
||||
parallelSlurmJobs = slurmX86Configs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, "slurm", "amd64"), {
|
||||
parallelSlurmJobs = x86SlurmTestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, "slurm", "amd64"), {
|
||||
def config = VANILLA_CONFIG
|
||||
if (key.contains("single-device")) {
|
||||
config = SINGLE_DEVICE_CONFIG
|
||||
@ -1612,25 +1612,25 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
|
||||
// Try to match what are being tested on x86 H100_PCIe.
|
||||
// The total machine time is scaled proportionally according to the number of each GPU.
|
||||
aarch64Configs = [
|
||||
SBSATestConfigs = [
|
||||
"GH200-1": ["gh200", "l0_gh200", 1, 2],
|
||||
"GH200-2": ["gh200", "l0_gh200", 2, 2],
|
||||
"GH200-[Post-Merge]": ["gh200", "l0_gh200", 1, 1],
|
||||
]
|
||||
fullSet += aarch64Configs.keySet()
|
||||
fullSet += SBSATestConfigs.keySet()
|
||||
|
||||
slurmSBSAConfigs = [
|
||||
SBSASlurmTestConfigs = [
|
||||
"GB200-4_GPUs-PyTorch-[Post-Merge]-1": ["gb200-4-gpus", "l0_gb200", 1, 1, 4],
|
||||
]
|
||||
fullSet += slurmSBSAConfigs.keySet()
|
||||
fullSet += SBSASlurmTestConfigs.keySet()
|
||||
|
||||
if (env.targetArch == AARCH64_TRIPLE) {
|
||||
parallelJobs = aarch64Configs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
|
||||
parallelJobs = SBSASlurmTestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
|
||||
runLLMTestlistOnPlatform(pipeline, values[0], values[1], LINUX_AARCH64_CONFIG, false, key, values[2], values[3])
|
||||
}]]}
|
||||
|
||||
// Add SBSA Slurm jobs
|
||||
parallelSlurmJobs = slurmSBSAConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, "slurm", "arm64"), {
|
||||
parallelSlurmJobs = SBSASlurmTestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, "slurm", "arm64"), {
|
||||
def config = LINUX_AARCH64_CONFIG
|
||||
if (key.contains("single-device")) {
|
||||
config = SINGLE_DEVICE_CONFIG
|
||||
|
||||
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#
|
||||
# This script collects tensorrt_llm unit tests and transform them into TensorRT TURTLE form.
|
||||
#
|
||||
# Usage:
|
||||
# 1. build and install tensorrt_llm python package
|
||||
# 2. install pytest `pip3 install pytest`
|
||||
# 3. run `python3 scripts/collect_unittests.py` in tensorrt_llm root directory.
|
||||
# 4. update the collected tests into TensorRt TURTLE test.
|
||||
# - check python list `LLM_UNIT_TESTS` in `<tensorrt repo>/tests/trt-test-defs/turtle/defs/llm/test_llm_unittests.py`.
|
||||
from subprocess import check_output
|
||||
|
||||
KEYWORDS = ["<Module", "<UnitTestCase", "<TestCaseFunction"]
|
||||
|
||||
|
||||
def fetch_tests():
|
||||
text = check_output(["pytest", "--collect-only", "tests/"])
|
||||
text = text.decode()
|
||||
lines = text.split("\n")
|
||||
lines = [line for line in lines if any([k in line for k in KEYWORDS])]
|
||||
|
||||
module, unittest, case = "<bad>", "<bad>", "<bad>"
|
||||
for line in lines:
|
||||
if "<Module" in line:
|
||||
module = line.replace("<Module ", "").replace(">", "").strip()
|
||||
elif "<UnitTestCase" in line:
|
||||
unittest = line.replace("<UnitTestCase ", "").replace(">",
|
||||
"").strip()
|
||||
elif "<TestCaseFunction" in line:
|
||||
case = line.replace("<TestCaseFunction ", "").replace(">",
|
||||
"").strip()
|
||||
print(f"LLMUnitTestCase(\"{module}\", \"{unittest}.{case}\"),")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fetch_tests()
|
||||
@ -15,7 +15,6 @@ def _clean_files(src_dir: PathLike, extend_files: str) -> None:
|
||||
".devcontainer",
|
||||
"docker/README.md",
|
||||
"jenkins",
|
||||
"scripts/collect_unittests.py",
|
||||
"scripts/package_trt_llm.py",
|
||||
"scripts/git_replace.py",
|
||||
"tests/integration",
|
||||
|
||||
@ -119,7 +119,7 @@ Due to CI hardware resource limitation, and some cases only run on specific GPUs
|
||||
|
||||
In directory `integration/test_lists/test-db`, each yml file corresponds to a GPU type.
|
||||
|
||||
In file `jenkins/L0_Test.groovy`, the variable `turtleConfigs` maps yml files to CI stages.
|
||||
In file `jenkins/L0_Test.groovy`, the variables `x86TestConfigs`, `SBSATestConfigs`, `x86SlurmTestConfigs` and `SBSASlurmTestConfigs` map yml files to CI stages according to platforms and launch methods.
|
||||
|
||||
Currently the yml files are manually maintained, which requires developer to update them when new test cases are added.
|
||||
|
||||
|
||||
@ -2121,7 +2121,7 @@ def all_pytest_items():
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def turtle_root():
|
||||
def test_root():
|
||||
return os.path.dirname(os.path.dirname(__file__))
|
||||
|
||||
|
||||
|
||||
@ -24,8 +24,7 @@ except ImportError:
|
||||
|
||||
@dataclass
|
||||
class BuildConfig:
|
||||
# TODO: Use `tensorrt_llm.builder.BuildConfig` when we switch to Pytest from TURTLE.
|
||||
# Using TURTLE, we cannot do `import tensorrt_llm` in this file.
|
||||
# TODO: Use `tensorrt_llm.builder.BuildConfig` when we switch to use Pytest.
|
||||
max_input_len: int = 256
|
||||
max_seq_len: int = 512
|
||||
opt_batch_size: int = 8
|
||||
|
||||
@ -350,7 +350,7 @@ def write_gpu_monitoring_no_test_results(logpath,
|
||||
|
||||
def get_log(fpath):
|
||||
"""
|
||||
Converts TURTLE log output into an ordered dict of stdout and stderr.
|
||||
Converts log output into an ordered dict of stdout and stderr.
|
||||
Used for raw_result for test_result.
|
||||
|
||||
Args:
|
||||
|
||||
@ -29,7 +29,6 @@ import time
|
||||
import psutil # type: ignore
|
||||
# Nvidia
|
||||
import pynvml # type: ignore
|
||||
# TURTLE
|
||||
from defs.trt_test_alternative import print_info, print_warning
|
||||
|
||||
from .misc import clean_device_product_name
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Miscellaneous utility code used by trt_test. Should contain all code that is agnostic to remote mode vs local mode of TURTLE.
|
||||
Miscellaneous utility code used by trt_test. Should contain all code that is agnostic to remote mode vs local mode.
|
||||
"""
|
||||
import subprocess as sp
|
||||
|
||||
|
||||
@ -71,7 +71,7 @@ class SessionDataWriter:
|
||||
def _write_session_perf_logs(self):
|
||||
"""
|
||||
Write session data. Should only be called once at the end of the entire
|
||||
perf session, in otherwords, when TURTLE ends during teardown().
|
||||
perf session, in otherwords, only during teardown().
|
||||
"""
|
||||
# Output various log files depending on options.
|
||||
for fmt in self._output_formats:
|
||||
|
||||
@ -274,7 +274,7 @@ class PerfTestMetric(NamedTuple):
|
||||
"""
|
||||
Configurations of a test metric.
|
||||
"""
|
||||
# The original test name used to run the TURTLE test.
|
||||
# The original test name used to run the oraginal perf test.
|
||||
original_test_name: str
|
||||
# The name for this particular metric.
|
||||
metric_name: str
|
||||
@ -759,7 +759,7 @@ class MultiMetricPerfTest(AbstractPerfScriptTestClass):
|
||||
"""
|
||||
|
||||
def __init__(self, full_test_name: str):
|
||||
# full_test_name is the full test name appearing in TURTLE output.
|
||||
# full_test_name is the full test name appearing in test output.
|
||||
self._full_test_name = full_test_name
|
||||
# test_domain_name is the part before "::".
|
||||
self._test_domain_name = "::".join(full_test_name.split("::")[:-1])
|
||||
|
||||
@ -83,7 +83,7 @@ def collect_and_clean_myelin_time(log: str):
|
||||
|
||||
class PerfMetricType(str, Enum):
|
||||
"""
|
||||
An string-enum type to define what kind of perf metric it is. While it is not used by TURTLE, it is used by QA to
|
||||
An string-enum type to define what kind of perf metric it is. It is used by QA to
|
||||
set up special threshold criteria for each type of metrics (like >50MB for engine size increase, etc.).
|
||||
"""
|
||||
INFERENCE_TIME = "INFERENCE_TIME"
|
||||
@ -352,13 +352,12 @@ class AbstractPerfScriptTestClass(abc.ABC):
|
||||
"""
|
||||
Get the absolute threshold used to flag a perf regression compared to perf baseline.
|
||||
Perf comparison will only fail if it exceeds both relative and absolute thresholds.
|
||||
Note: This is not honored by TURTLE for now, but we can add the support later.
|
||||
"""
|
||||
return 0.0
|
||||
|
||||
def get_metric_type(self) -> PerfMetricType:
|
||||
"""
|
||||
Get the type of perf metric. This does not affect TURTLE for now, but QA uses this field to set up special
|
||||
Get the type of perf metric. QA uses this field to set up special
|
||||
threshold criteria depending on the metric type.
|
||||
"""
|
||||
return PerfMetricType.INFERENCE_TIME
|
||||
|
||||
@ -87,7 +87,7 @@ def validate_perf_tests(perf_test_names) -> bool:
|
||||
return passed
|
||||
|
||||
|
||||
def test_list_validation(turtle_root, all_pytest_items, trt_config,
|
||||
def test_list_validation(test_root, all_pytest_items, trt_config,
|
||||
is_trt_environment):
|
||||
|
||||
# Don't run test list validation in TRT environment because TRT uses
|
||||
@ -99,13 +99,13 @@ def test_list_validation(turtle_root, all_pytest_items, trt_config,
|
||||
return
|
||||
|
||||
# Glob all the test list files.
|
||||
test_list_path = os.path.join(turtle_root, "test_lists", "*", "*.txt")
|
||||
test_list_path = os.path.join(test_root, "test_lists", "*", "*.txt")
|
||||
all_test_lists = glob.glob(test_list_path)
|
||||
assert len(all_test_lists
|
||||
) > 0, f"Cannot find any test lists with path {test_list_path}!"
|
||||
|
||||
# Glob all the test db files.
|
||||
test_db_path = os.path.join(turtle_root, "test_lists", "*", "*.yml")
|
||||
test_db_path = os.path.join(test_root, "test_lists", "*", "*.yml")
|
||||
all_test_dbs = glob.glob(test_db_path)
|
||||
assert len(all_test_dbs
|
||||
) > 0, f"Cannot find any test lists with path {test_db_path}!"
|
||||
|
||||
@ -7,33 +7,10 @@ import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
# pytest_plugins = ["pytester", "trt_test.pytest_plugin"]
|
||||
USE_TURTLE = True
|
||||
try:
|
||||
import trt_test # noqa
|
||||
except ImportError:
|
||||
from .test_list_parser import (CorrectionMode, get_test_name_corrections_v2,
|
||||
handle_corrections)
|
||||
from .trt_test_alternative import (SessionDataWriter, check_call,
|
||||
check_output, print_info)
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def trt_config():
|
||||
return None # tekit shall never call this
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def gitlab_token():
|
||||
return None # tekit shall never call this
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def versions_from_infer_device():
|
||||
pass
|
||||
|
||||
USE_TURTLE = False
|
||||
else:
|
||||
from trt_test.misc import check_call, check_output, print_info
|
||||
from trt_test.session_data_writer import SessionDataWriter
|
||||
USE_TURTLE = True
|
||||
from .trt_test_alternative import (SessionDataWriter, check_call, check_output,
|
||||
print_info)
|
||||
|
||||
|
||||
def llm_models_root() -> str:
|
||||
@ -78,9 +55,9 @@ def trt_performance_cache_fpath(trt_config, trt_performance_cache_name):
|
||||
return fpath
|
||||
|
||||
|
||||
# Get the executing turtle case name
|
||||
# Get the executing test case name
|
||||
@pytest.fixture(autouse=True)
|
||||
def turtle_case_name(request):
|
||||
def test_case_name(request):
|
||||
return request.node.nodeid
|
||||
|
||||
|
||||
@ -121,42 +98,11 @@ def llm_session_data_writer(trt_config, trt_gpu_clock_lock,
|
||||
session_data_writer.teardown()
|
||||
|
||||
|
||||
if USE_TURTLE:
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def trt_py3_venv_factory(trt_py_base_venv_factory):
|
||||
"""
|
||||
Session-scoped fixture which provides a factory function to produce a VirtualenvRunner capable of
|
||||
running Python3 code. Used by other session-scoped fixtures which need to modify the default VirtualenvRunner prolog.
|
||||
"""
|
||||
|
||||
# TODO: remove update env after TURTLE support multi devices
|
||||
# Temporarily update CUDA_VISIBLE_DEVICES visible device
|
||||
device_count = get_device_count()
|
||||
visible_devices = ",".join([str(i) for i in range(device_count)])
|
||||
|
||||
print_info(f"Setting CUDA_VISIBLE_DEVICES to {visible_devices}.")
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices
|
||||
|
||||
def factory():
|
||||
return trt_py_base_venv_factory("python3")
|
||||
|
||||
return factory
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def llm_backend_venv(trt_py3_venv_factory):
|
||||
"""
|
||||
The fixture venv used for LLM tests.
|
||||
"""
|
||||
venv = trt_py3_venv_factory()
|
||||
return venv
|
||||
else:
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def custom_user_workspace(request):
|
||||
return request.config.getoption("--workspace")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def llm_backend_venv(custom_user_workspace):
|
||||
workspace_dir = custom_user_workspace
|
||||
@ -689,70 +635,6 @@ def output_dir(request):
|
||||
return request.config.getoption("--output-dir")
|
||||
|
||||
|
||||
if USE_TURTLE: # perf tests can not run outside turtle for now
|
||||
# Cache all the pytest items so that we can do test list validation.
|
||||
ALL_PYTEST_ITEMS = None # All pytest items available, before deselection.
|
||||
|
||||
@pytest.hookimpl(hookwrapper=True, tryfirst=True)
|
||||
def pytest_collection_modifyitems(session, config, items):
|
||||
# Flush the current stdout line.
|
||||
print()
|
||||
|
||||
import copy
|
||||
|
||||
global ALL_PYTEST_ITEMS
|
||||
ALL_PYTEST_ITEMS = copy.copy(items)
|
||||
_ = yield
|
||||
|
||||
else:
|
||||
#
|
||||
# When test parameters have an empty id, older versions of pytest ignored that parameter when generating the
|
||||
# test node's ID completely. This however was actually a bug, and not expected behavior that got fixed in newer
|
||||
# versions of pytest:https://github.com/pytest-dev/pytest/pull/6607. TRT test defs however rely on this behavior
|
||||
# for quite a few test names. This is a hacky WAR that restores the old behavior back so that the
|
||||
# test names do not change. Note: This might break in a future pytest version.
|
||||
#
|
||||
# TODO: Remove this hack once the test names are fixed.
|
||||
#
|
||||
|
||||
from _pytest.python import CallSpec2
|
||||
CallSpec2.id = property(
|
||||
lambda self: "-".join(map(str, filter(None, self._idlist))))
|
||||
|
||||
# @pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
||||
# def pytest_collection_modifyitems(config, items):
|
||||
# testlist_path = config.getoption("--test-list")
|
||||
# waives_file = config.getoption("--waives-file")
|
||||
# test_prefix = config.getoption("--test-prefix")
|
||||
# if test_prefix:
|
||||
# # Override the internal nodeid of each item to contain the correct test prefix.
|
||||
# # This is needed for reporting to correctly process the test name in order to bucket
|
||||
# # it into the appropriate test suite.
|
||||
# for item in items:
|
||||
# item._nodeid = "{}/{}".format(test_prefix, item._nodeid)
|
||||
|
||||
# regexp = config.getoption("--regexp")
|
||||
|
||||
# if testlist_path:
|
||||
# modify_by_test_list(testlist_path, items, config)
|
||||
|
||||
# if regexp is not None:
|
||||
# deselect_by_regex(regexp, items, test_prefix, config)
|
||||
|
||||
# if waives_file:
|
||||
# apply_waives(waives_file, items, config)
|
||||
|
||||
# # We have to remove prefix temporarily before splitting the test list
|
||||
# # After that change back the test id.
|
||||
# for item in items:
|
||||
# if test_prefix and item._nodeid.startswith(f"{test_prefix}/"):
|
||||
# item._nodeid = item._nodeid[len(f"{test_prefix}/"):]
|
||||
# yield
|
||||
# for item in items:
|
||||
# if test_prefix:
|
||||
# item._nodeid = f"{test_prefix}/{item._nodeid}"
|
||||
|
||||
|
||||
def deselect_by_regex(regexp, items, test_prefix, config):
|
||||
"""Filter out tests based on the patterns specified in the given list of regular expressions.
|
||||
If a test matches *any* of the expressions in the list it is considered selected."""
|
||||
@ -765,8 +647,8 @@ else:
|
||||
selected = []
|
||||
deselected = []
|
||||
|
||||
corrections = get_test_name_corrections_v2(
|
||||
set(regex_list), set(it.nodeid for it in items),
|
||||
corrections = get_test_name_corrections_v2(set(regex_list),
|
||||
set(it.nodeid for it in items),
|
||||
CorrectionMode.REGEX)
|
||||
handle_corrections(corrections, test_prefix)
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@ class PythonVenvRunnerImpl(PythonRunnerInterface):
|
||||
venv_dir (str): Path to the virtualenv root directory, or None if this is
|
||||
an externally-built virtualenv
|
||||
venv_bin (str): Path to the Python executable to use when running tests
|
||||
workspace (str): Path to the TURTLE workspace
|
||||
workspace (str): Path to the test workspace
|
||||
"""
|
||||
|
||||
def __init__(self, pip_opts, venv_dir, venv_bin, workspace):
|
||||
|
||||
@ -1,6 +0,0 @@
|
||||
{
|
||||
"needs_turtle_major": [
|
||||
5,
|
||||
6
|
||||
]
|
||||
}
|
||||
@ -198,29 +198,29 @@ l0_a30:
|
||||
- triton_server/test_triton_llm.py::test_mistral_v1_7b_python_backend[e2e]
|
||||
- triton_server/test_triton_llm.py::test_gpt_350m_python_backend[accuracy]
|
||||
- triton_server/test_triton_llm.py::test_gpt_350m_python_backend[e2e]
|
||||
- triton_server/test_triton_llm.py::test_gpt_350m_ifb[test_basic-False-1-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llama_v2_7b_ifb[batched_inputs-False-1-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llava[False-1-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.7-max_utilization-1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_medusa_vicuna_7b_ifb[False-1-medusa-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_eagle_vicuna_7b_ifb[False-1-eagle-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_llama_v2_7b_ifb[test_stop_words-False-1-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_mistral_v1_7b_ifb[False-1-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-4096-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_mistral_v1_multi_models[False-1-False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-4096-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_gpt_350m_speculative_decoding_return_logits[False-1-False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_t5_small_enc_dec_ifb[test_basic-False-1-top_k_top_p-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-guaranteed_no_evict-4096-1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_gpt_speculative_decoding_bls[False-False-1-False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-guaranteed_no_evict-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_benchmark_core_model[llama_v2_7b-False-1-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-4096-1-1-1-False]
|
||||
- triton_server/test_triton_llm.py::test_benchmark_core_model[gptj_6b-False-1-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-4096-1-1-1-False]
|
||||
- triton_server/test_triton_llm.py::test_gpt_speculative_decoding_bls[True-False-1-False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_gpt_speculative_decoding_bls[True-False-1-False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-guaranteed_no_evict-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_whisper_large_v3_ifb[True-1-top_k_top_p-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-0.5-guaranteed_no_evict-24000-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_llava_onevision[test_video-False-1-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-guaranteed_no_evict-1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llava_onevision[test_basic-False-1-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization-1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_tiny_llama_1b_guided_decoding[xgrammar-tensorrtllm-True-1-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-guaranteed_no_evict-1-1-1-False-ensemble-accuracy]
|
||||
- triton_server/test_triton_llm.py::test_tiny_llama_1b_guided_decoding[xgrammar-python-True-1-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-guaranteed_no_evict-1-1-1-False-ensemble-accuracy]
|
||||
- triton_server/test_triton_llm.py::test_gpt_disaggregated_serving_bls[test_basic-False-1-top_k_top_p-False-True-True-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization-1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llama_v2_70b_ifb_lad[7-7-7-False-1-lookahead-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_gpt_350m_ifb[test_basic-False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization---1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llama_v2_7b_ifb[batched_inputs-False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization---1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llava[False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.7-max_utilization---1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_medusa_vicuna_7b_ifb[False-1-medusa--False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_eagle_vicuna_7b_ifb[False-1-eagle--False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_llama_v2_7b_ifb[test_stop_words-False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization---1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_mistral_v1_7b_ifb[False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization-4096--1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_mistral_v1_multi_models[False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization-4096--1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_gpt_350m_speculative_decoding_return_logits[False-1---False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_t5_small_enc_dec_ifb[test_basic-False-1-top_k_top_p--False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap---guaranteed_no_evict--4096-1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_gpt_speculative_decoding_bls[False-False-1---False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-guaranteed_no_evict---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_benchmark_core_model[llama_v2_7b-False-1---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization-4096--1-1-1-False]
|
||||
- triton_server/test_triton_llm.py::test_benchmark_core_model[gptj_6b-False-1---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization-4096--1-1-1-False]
|
||||
- triton_server/test_triton_llm.py::test_gpt_speculative_decoding_bls[True-False-1---False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_gpt_speculative_decoding_bls[True-False-1---False-True-True-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-guaranteed_no_evict---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_whisper_large_v3_ifb[True-1-top_k_top_p--False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-0.5-guaranteed_no_evict--24000-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_llava_onevision[test_video-False-1---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-guaranteed_no_evict---1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llava_onevision[test_basic-False-1---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization---1-1-1-False-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_tiny_llama_1b_guided_decoding[xgrammar-tensorrtllm-True-1---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--guaranteed_no_evict---1-1-1-False-ensemble-accuracy]
|
||||
- triton_server/test_triton_llm.py::test_tiny_llama_1b_guided_decoding[xgrammar-python-True-1---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--guaranteed_no_evict---1-1-1-False-ensemble-accuracy]
|
||||
- triton_server/test_triton_llm.py::test_gpt_disaggregated_serving_bls[test_basic-False-1-top_k_top_p--False-True-True-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-0.2-max_utilization---1-1-1-True-tensorrt_llm_bls]
|
||||
- triton_server/test_triton_llm.py::test_llama_v2_70b_ifb_lad[7-7-7-False-1-lookahead--False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_llm.py::test_llmapi_backend[1-0-disableDecoupleMode-tensorrt_llm]
|
||||
- triton_server/test_triton_llm.py::test_llmapi_backend[1-0-enableDecoupleMode-tensorrt_llm]
|
||||
- triton_server/test_triton_rcca.py::test_mistral_beam_search[rcca_4714407-True-10-False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap-guaranteed_no_evict-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_rcca.py::test_rcca_bug_4934893[Temperature:0.5-TOP_P:0.95-TOP_K:10-False-1-False-True-False-0-2048-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap-max_utilization-1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_rcca.py::test_mistral_beam_search[rcca_4714407-True-10---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--guaranteed_no_evict---1-1-1-False-ensemble]
|
||||
- triton_server/test_triton_rcca.py::test_rcca_bug_4934893[Temperature:0.5-TOP_P:0.95-TOP_K:10-False-1---False-True-False-0-2048-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--max_utilization---1-1-1-False-ensemble]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user