TensorRT-LLMs/examples/configs/database/database.py

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from pathlib import Path
from typing import Any, Dict, Iterator, List, Tuple

import yaml
from pydantic import BaseModel, Field, RootModel

REPO_ROOT = Path(__file__).parent.parent.parent.parent
DATABASE_LIST_PATH = Path(__file__).parent / "lookup.yaml"

LOW_LATENCY_CONCURRENCY_THRESHOLD = 8
HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD = 32
KEY_PROFILES = {"Min Latency", "Balanced", "Max Throughput"}


class Recipe(BaseModel):
    """Recipe record for scenario list."""

    model: str = Field(description="Model name")
    gpu: str = Field(description="GPU name")
    isl: int = Field(description="Input sequence length")
    osl: int = Field(description="Output sequence length")
    concurrency: int = Field(description="Concurrency")
    config_path: str = Field(description="Configuration path")
    num_gpus: int = Field(description="Number of GPUs")

    def load_config(self) -> Dict[str, Any]:
        """Load and return the YAML config at config_path."""
        config_relative_path = Path(self.config_path)
        # Ensure config path is within the repo root
        if config_relative_path.is_absolute() or ".." in config_relative_path.parts:
            raise ValueError(f"Invalid config path: {self.config_path}")
        full_path = REPO_ROOT / self.config_path
        if not full_path.exists():
            raise FileNotFoundError(f"Config not found: {full_path}")
        with open(full_path, encoding="utf-8") as f:
            return yaml.safe_load(f)


class RecipeList(RootModel[List[Recipe]]):
    @classmethod
    def from_yaml(cls, yaml_path: Path) -> "RecipeList":
        """Load and validate recipe list from YAML file."""
        with open(yaml_path, encoding="utf-8") as f:
            data = yaml.safe_load(f)
        return cls(data)

    def __iter__(self) -> Iterator[Recipe]:
        return iter(self.root)

    def __len__(self) -> int:
        return len(self.root)


def assign_profile(num_recipes: int, idx: int, concurrency: int) -> str:
    """Assign performance profile to a recipe based on its position in a concurrency-sorted list."""
    if num_recipes == 1:
        if concurrency <= LOW_LATENCY_CONCURRENCY_THRESHOLD:
            return "Low Latency"
        elif concurrency >= HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD:
            return "High Throughput"
        else:
            return "Balanced"
    elif idx == 0:
        return "Min Latency"
    elif idx == num_recipes - 1:
        return "Max Throughput"
    elif idx in ((num_recipes - 1) // 2, num_recipes // 2):
        return "Balanced"
    elif idx < num_recipes // 2:
        return "Low Latency"
    else:
        return "High Throughput"


def select_key_recipes(recipes: List[Recipe]) -> List[Tuple[Recipe, str]]:
    """Select key recipes (min latency, balanced, max throughput) from a list of recipes."""
    if not recipes:
        return []

    sorted_recipes = sorted(recipes, key=lambda r: r.concurrency)
    n = len(sorted_recipes)

    result = []
    seen_profiles = set()
    for idx, recipe in enumerate(sorted_recipes):
        profile = assign_profile(n, idx, recipe.concurrency)
        # For n==1, keep whatever profile is assigned
        # For n>=2, only keep key profiles and dedupe (for even n, two indices get "Balanced")
        if n == 1 or (profile in KEY_PROFILES and profile not in seen_profiles):
            result.append((recipe, profile))
            seen_profiles.add(profile)
    return result