mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-22 19:52:38 +08:00
* fix: Fix/fused moe 0.19 (#3799) * fix bug of stream init Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> * fix bug Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> --------- Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> * fix: Add pre-download of checkpoint before benchmark. (#3772) * Add pre-download of checkpoint before benchmark. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> * Add missing remote code flag. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> * Move from_pretrained to throughput benchmark. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> * Move download and use snapshot_download. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> * Removed trusted flag. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> * Fix benchmark command in iteration log test. Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> --------- Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> * [https://nvbugspro.nvidia.com/bug/5241495][fix] CUDA Graph padding with overlap scheduler (#3839) * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fuse Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --------- Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * TRTLLM-4875 feat: Add version switcher to doc (#3871) Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> * waive a test (#3897) Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> * docs:fix https://nvbugs/5244616 by removing new invalid links. (#3939) Signed-off-by: nv-guomingz <37257613+nv-guomingz@users.noreply.github.com> Co-authored-by: nv-guomingz <37257613+nv-guomingz@users.noreply.github.com> * fix: remote mpi session abort (#3884) * fix remote mpi session Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> * fix Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> --------- Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> * skip fp8 gemm for pre-hopper (#3931) Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> * [https://nvbugspro.nvidia.com/bug/5247148][fix] Attention DP with overlap scheduler (#3975) * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * update multigpu list Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix namings Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --------- Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * Doc: Fix H200 DeepSeek R1 perf doc (#4006) * fix doc Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> * update perf number Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> --------- Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> * Fix the perf regression caused by insufficient cache warmup. (#4042) Force tuning up to 8192 sequence length for NVFP4 linear op. Also, make this runtime-selectable with UB enabled. Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com> * doc: Update 0.19.0 release notes (#3976) Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> * Optimize the AutoTuner cache access code to reduce host code overhead. (#4060) The NVFP4 Linear op is very sensitive to the host overhead. This PR introduces customizable `find_nearest_profile` and `get_cache_key_specifc`, which allow users to override the default method for generating the cache key. Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com> * Update switcher (#4098) Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> * doc: update release notes (#4108) Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> * docs:update 0.19 doc. (#4120) Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> * docs:add torch flow supported model list. (#4129) Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> * doc: Release V0.19 Perf Overview Update (#4166) Signed-off-by: zpatel <22306219+zbpatel@users.noreply.github.com> * Fix readme of autodeploy. Signed-off-by: Daniel Campora <961215+dcampora@users.noreply.github.com> * Update tensorrt_llm/_torch/pyexecutor/llm_request.py Co-authored-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Signed-off-by: Daniel Cámpora <961215+dcampora@users.noreply.github.com> * Revert mgmn worker node. Signed-off-by: Daniel Campora <961215+dcampora@users.noreply.github.com> * Change to disable_overlap_scheduler. Signed-off-by: Daniel Campora <961215+dcampora@users.noreply.github.com> --------- Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com> Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com> Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> Signed-off-by: nv-guomingz <37257613+nv-guomingz@users.noreply.github.com> Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com> Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> Signed-off-by: zpatel <22306219+zbpatel@users.noreply.github.com> Signed-off-by: Daniel Campora <961215+dcampora@users.noreply.github.com> Signed-off-by: Daniel Cámpora <961215+dcampora@users.noreply.github.com> Co-authored-by: bhsueh_NV <11360707+byshiue@users.noreply.github.com> Co-authored-by: Frank <3429989+FrankD412@users.noreply.github.com> Co-authored-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Co-authored-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> Co-authored-by: Yan Chunwei <328693+Superjomn@users.noreply.github.com> Co-authored-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> Co-authored-by: nv-guomingz <37257613+nv-guomingz@users.noreply.github.com> Co-authored-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Co-authored-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Co-authored-by: Yukun He <23156053+hyukn@users.noreply.github.com> Co-authored-by: Zac Patel <22306219+zbpatel@users.noreply.github.com>
160 lines
4.7 KiB
Python
160 lines
4.7 KiB
Python
# Configuration file for the Sphinx documentation builder.
|
|
#
|
|
# For the full list of built-in configuration values, see the documentation:
|
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
|
|
|
import importlib.util
|
|
# -- Project information -----------------------------------------------------
|
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
|
|
import pygit2
|
|
|
|
sys.path.insert(0, os.path.abspath('.'))
|
|
|
|
project = 'TensorRT-LLM'
|
|
copyright = '2025, NVidia'
|
|
author = 'NVidia'
|
|
branch_name = pygit2.Repository('.').head.shorthand
|
|
html_show_sphinx = False
|
|
|
|
# Get the version from the version.py file
|
|
version_path = os.path.abspath(
|
|
os.path.join(os.path.dirname(__file__), "../../tensorrt_llm/version.py"))
|
|
spec = importlib.util.spec_from_file_location("version_module", version_path)
|
|
version_module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(version_module)
|
|
version = version_module.__version__
|
|
|
|
# -- General configuration ---------------------------------------------------
|
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
|
|
|
templates_path = ['_templates']
|
|
exclude_patterns = ['performance/performance-tuning-guide/introduction.md']
|
|
|
|
extensions = [
|
|
'sphinx.ext.duration',
|
|
'sphinx.ext.autodoc',
|
|
'sphinx.ext.autosummary',
|
|
'sphinx.ext.viewcode',
|
|
'sphinx.ext.napoleon',
|
|
'myst_parser', # for markdown support
|
|
"breathe",
|
|
'sphinx.ext.todo',
|
|
'sphinx.ext.autosectionlabel',
|
|
'sphinxarg.ext',
|
|
'sphinx_click',
|
|
'sphinx_copybutton',
|
|
'sphinxcontrib.autodoc_pydantic'
|
|
]
|
|
|
|
autodoc_pydantic_model_show_json = True
|
|
autodoc_pydantic_model_show_config_summary = True
|
|
autodoc_pydantic_field_doc_policy = "description"
|
|
autodoc_pydantic_model_show_field_list = True # Display field list with descriptions
|
|
|
|
myst_url_schemes = {
|
|
"http":
|
|
None,
|
|
"https":
|
|
None,
|
|
"source":
|
|
"https://github.com/NVIDIA/TensorRT-LLM/tree/" + branch_name + "/{{path}}",
|
|
}
|
|
|
|
myst_heading_anchors = 4
|
|
|
|
myst_enable_extensions = [
|
|
"deflist",
|
|
]
|
|
|
|
autosummary_generate = True
|
|
copybutton_exclude = '.linenos, .gp, .go'
|
|
copybutton_prompt_text = ">>> |$ |# "
|
|
copybutton_line_continuation_character = "\\"
|
|
|
|
# -- Options for HTML output -------------------------------------------------
|
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
|
|
|
source_suffix = {
|
|
'.rst': 'restructuredtext',
|
|
'.txt': 'markdown',
|
|
'.md': 'markdown',
|
|
'.json': 'json',
|
|
}
|
|
|
|
html_theme = 'nvidia_sphinx_theme'
|
|
html_static_path = ['_static']
|
|
html_extra_path = ["./_static/switcher.json"]
|
|
html_theme_options = {
|
|
"switcher": {
|
|
"json_url": "./_static/switcher.json",
|
|
"version_match": version,
|
|
"check_switcher": True,
|
|
},
|
|
}
|
|
|
|
# ------------------------ C++ Doc related --------------------------
|
|
# Breathe configuration
|
|
breathe_default_project = "TensorRT-LLM"
|
|
breathe_projects = {"TensorRT-LLM": "../cpp_docs/xml"}
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
CPP_INCLUDE_DIR = os.path.join(SCRIPT_DIR, '../../cpp/include/tensorrt_llm')
|
|
CPP_GEN_DIR = os.path.join(SCRIPT_DIR, '_cpp_gen')
|
|
print('CPP_INCLUDE_DIR', CPP_INCLUDE_DIR)
|
|
print('CPP_GEN_DIR', CPP_GEN_DIR)
|
|
|
|
|
|
def setup(app):
|
|
from helper import generate_examples, generate_llmapi
|
|
|
|
generate_examples()
|
|
generate_llmapi()
|
|
|
|
|
|
def gen_cpp_doc(ofile_name: str, header_dir: str, summary: str):
|
|
cpp_header_files = [
|
|
file for file in os.listdir(header_dir) if file.endswith('.h')
|
|
]
|
|
|
|
with open(ofile_name, 'w') as ofile:
|
|
ofile.write(summary + "\n")
|
|
for header in cpp_header_files:
|
|
ofile.write(f"{header}\n")
|
|
ofile.write("_" * len(header) + "\n\n")
|
|
|
|
ofile.write(f".. doxygenfile:: {header}\n")
|
|
ofile.write(" :project: TensorRT-LLM\n\n")
|
|
|
|
|
|
runtime_summary = f"""
|
|
Runtime
|
|
==========
|
|
|
|
.. Here are files in the cpp/include/runtime
|
|
.. We manually add subsection to enable detailed description in the future
|
|
.. It is also doable to automatically generate this file and list all the modules in the conf.py
|
|
""".strip()
|
|
|
|
# compile cpp doc
|
|
subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
|
|
gen_cpp_doc(CPP_GEN_DIR + '/runtime.rst', CPP_INCLUDE_DIR + '/runtime',
|
|
runtime_summary)
|
|
|
|
executor_summary = f"""
|
|
Executor
|
|
==========
|
|
|
|
.. Here are files in the cpp/include/executor
|
|
.. We manually add subsection to enable detailed description in the future
|
|
.. It is also doable to automatically generate this file and list all the modules in the conf.py
|
|
""".strip()
|
|
|
|
subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
|
|
gen_cpp_doc(CPP_GEN_DIR + '/executor.rst', CPP_INCLUDE_DIR + '/executor',
|
|
executor_summary)
|