mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
update docs for 0.20.0rc2 Signed-off-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
6991 lines
632 KiB
HTML
6991 lines
632 KiB
HTML
|
||
|
||
<!DOCTYPE html>
|
||
|
||
|
||
<html lang="en" data-content_root="../" >
|
||
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<title>Functionals — TensorRT-LLM</title>
|
||
|
||
|
||
|
||
<script data-cfasync="false">
|
||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||
</script>
|
||
<!--
|
||
this give us a css class that will be invisible only if js is disabled
|
||
-->
|
||
<noscript>
|
||
<style>
|
||
.pst-js-only { display: none !important; }
|
||
|
||
</style>
|
||
</noscript>
|
||
|
||
<!-- Loaded before other Sphinx assets -->
|
||
<link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
|
||
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
|
||
|
||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
|
||
|
||
<!-- So that users can add custom icons -->
|
||
<script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
|
||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
|
||
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
|
||
|
||
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
|
||
<script src="../_static/doctools.js?v=888ff710"></script>
|
||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
||
<script src="../_static/copybutton.js?v=65e89d2a"></script>
|
||
<script>DOCUMENTATION_OPTIONS.pagename = 'python-api/tensorrt_llm.functional';</script>
|
||
<script>
|
||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.20.0rc1';
|
||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||
false;
|
||
</script>
|
||
<link rel="icon" href="../_static/favicon.png"/>
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
<link rel="next" title="Models" href="tensorrt_llm.models.html" />
|
||
<link rel="prev" title="Layers" href="tensorrt_llm.layers.html" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||
<meta name="docsearch:language" content="en"/>
|
||
<meta name="docsearch:version" content="0.20.0rc1" />
|
||
|
||
|
||
</head>
|
||
|
||
|
||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||
|
||
|
||
|
||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||
|
||
<div id="pst-scroll-pixel-helper"></div>
|
||
|
||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||
|
||
|
||
<dialog id="pst-search-dialog">
|
||
|
||
<form class="bd-search d-flex align-items-center"
|
||
action="../search.html"
|
||
method="get">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<input type="search"
|
||
class="form-control"
|
||
name="q"
|
||
placeholder="Search the docs ..."
|
||
aria-label="Search the docs ..."
|
||
autocomplete="off"
|
||
autocorrect="off"
|
||
autocapitalize="off"
|
||
spellcheck="false"/>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||
</form>
|
||
</dialog>
|
||
|
||
<div class="pst-async-banner-revealer d-none">
|
||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||
</div>
|
||
|
||
|
||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||
<div class="bd-header__inner bd-page-width">
|
||
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
|
||
<span class="fa-solid fa-bars"></span>
|
||
</button>
|
||
|
||
|
||
<div class="col-lg-3 navbar-header-items__start">
|
||
|
||
<div class="navbar-item">
|
||
|
||
|
||
|
||
|
||
|
||
<a class="navbar-brand logo" href="../index.html">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT-LLM - Home"/>
|
||
<img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT-LLM - Home"/>
|
||
|
||
|
||
<p class="title logo__title">TensorRT-LLM</p>
|
||
|
||
</a></div>
|
||
|
||
</div>
|
||
|
||
<div class="col-lg-9 navbar-header-items">
|
||
|
||
<div class="me-auto navbar-header-items__center">
|
||
|
||
<div class="navbar-item">
|
||
|
||
|
||
<div class="version-switcher__container dropdown pst-js-only">
|
||
<button id="pst-version-switcher-button-2"
|
||
type="button"
|
||
class="version-switcher__button btn btn-sm dropdown-toggle"
|
||
data-bs-toggle="dropdown"
|
||
aria-haspopup="listbox"
|
||
aria-controls="pst-version-switcher-list-2"
|
||
aria-label="Version switcher list"
|
||
>
|
||
Choose version <!-- this text may get changed later by javascript -->
|
||
<span class="caret"></span>
|
||
</button>
|
||
<div id="pst-version-switcher-list-2"
|
||
class="version-switcher__menu dropdown-menu list-group-flush py-0"
|
||
role="listbox" aria-labelledby="pst-version-switcher-button-2">
|
||
<!-- dropdown will be populated by javascript on page load -->
|
||
</div>
|
||
</div></div>
|
||
|
||
</div>
|
||
|
||
|
||
<div class="navbar-header-items__end">
|
||
|
||
<div class="navbar-item navbar-persistent--container">
|
||
|
||
|
||
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<span class="search-button__default-text">Search</span>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||
</button>
|
||
</div>
|
||
|
||
|
||
<div class="navbar-item">
|
||
|
||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
|
||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
|
||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
|
||
</button></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
<div class="navbar-persistent--mobile">
|
||
|
||
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<span class="search-button__default-text">Search</span>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||
</button>
|
||
</div>
|
||
|
||
|
||
|
||
<button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
|
||
<span class="fa-solid fa-outdent"></span>
|
||
</button>
|
||
|
||
</div>
|
||
|
||
</header>
|
||
|
||
|
||
<div class="bd-container">
|
||
<div class="bd-container__inner bd-page-width">
|
||
|
||
|
||
|
||
<dialog id="pst-primary-sidebar-modal"></dialog>
|
||
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<a class="navbar-brand logo" href="../index.html">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT-LLM - Home"/>
|
||
<img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT-LLM - Home"/>
|
||
|
||
|
||
<p class="title logo__title">TensorRT-LLM</p>
|
||
|
||
</a>
|
||
|
||
|
||
|
||
<div class="sidebar-header-items sidebar-primary__section">
|
||
|
||
|
||
<div class="sidebar-header-items__center">
|
||
|
||
|
||
|
||
<div class="navbar-item">
|
||
|
||
|
||
<div class="version-switcher__container dropdown pst-js-only">
|
||
<button id="pst-version-switcher-button-3"
|
||
type="button"
|
||
class="version-switcher__button btn btn-sm dropdown-toggle"
|
||
data-bs-toggle="dropdown"
|
||
aria-haspopup="listbox"
|
||
aria-controls="pst-version-switcher-list-3"
|
||
aria-label="Version switcher list"
|
||
>
|
||
Choose version <!-- this text may get changed later by javascript -->
|
||
<span class="caret"></span>
|
||
</button>
|
||
<div id="pst-version-switcher-list-3"
|
||
class="version-switcher__menu dropdown-menu list-group-flush py-0"
|
||
role="listbox" aria-labelledby="pst-version-switcher-button-3">
|
||
<!-- dropdown will be populated by javascript on page load -->
|
||
</div>
|
||
</div></div>
|
||
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="sidebar-header-items__end">
|
||
|
||
<div class="navbar-item">
|
||
|
||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
|
||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
|
||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
|
||
</button></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||
<div class="sidebar-primary-item">
|
||
|
||
|
||
|
||
<nav class="bd-docs-nav bd-links"
|
||
aria-label="Table of Contents">
|
||
<p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
|
||
<div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../key-features.html">Key Features</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../torch.html">PyTorch Backend</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../release-notes.html">Release Notes</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Installation</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../installation/linux.html">Installing on Linux</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../installation/grace-hopper.html">Installing on Grace Hopper</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">LLM API</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../llm-api/index.html">API Introduction</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../llm-api/reference.html">API Reference</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Examples</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/index.html">LLM Examples Introduction</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_quantization.html">Generation with Quantization</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async.html">Generate Text Asynchronously</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_kv_events.html">Get KV Cache Events</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_customize.html">Generate text with customization</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async_streaming.html">Generate Text in Streaming</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_medusa_decoding.html">Generate Text Using Medusa Decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_lookahead_decoding.html">Generate Text Using Lookahead Decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference.html">Generate text</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_eagle_decoding.html">Generate Text Using Eagle Decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_auto_parallel.html">Automatic Parallelism with LLM</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_llm_distributed.html">Llm Mgmn Llm Distributed</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_bench.html">Llm Mgmn Trtllm Bench</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_serve.html">Llm Mgmn Trtllm Serve</a></li>
|
||
</ul>
|
||
</details></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../examples/customization.html">LLM Common Customizations</a></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_quantization.html">Generation with Quantization</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async.html">Generate Text Asynchronously</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_kv_events.html">Get KV Cache Events</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_customize.html">Generate text with customization</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_async_streaming.html">Generate Text in Streaming</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_medusa_decoding.html">Generate Text Using Medusa Decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_lookahead_decoding.html">Generate Text Using Lookahead Decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_inference.html">Generate text</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_eagle_decoding.html">Generate Text Using Eagle Decoding</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_auto_parallel.html">Automatic Parallelism with LLM</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_llm_distributed.html">Llm Mgmn Llm Distributed</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_bench.html">Llm Mgmn Trtllm Bench</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/llm_mgmn_trtllm_serve.html">Llm Mgmn Trtllm Serve</a></li>
|
||
</ul>
|
||
</details></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../examples/trtllm_serve_examples.html">Online Serving Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client.html">Curl Chat Client</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/curl_completion_client.html">Curl Completion Client</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/genai_perf_client.html">Genai Perf Client</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/openai_chat_client.html">OpenAI Chat Client</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/openai_chat_client_for_multimodal.html">OpenAI Chat Client</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../examples/openai_completion_client.html">OpenAI Completion Client</a></li>
|
||
</ul>
|
||
</details></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Model Definition API</span></p>
|
||
<ul class="current nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.layers.html">Layers</a></li>
|
||
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Functionals</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.models.html">Models</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.plugin.html">Plugin</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.quantization.html">Quantization</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.runtime.html">Runtime</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">C++ API</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/executor.html">Executor</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/runtime.html">Runtime</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Command-Line Reference</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-build.html">trtllm-build</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-serve.html">trtllm-serve</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Architecture</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../architecture/overview.html">TensorRT-LLM Architecture</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html">Model Definition</a></li>
|
||
|
||
|
||
|
||
<li class="toctree-l1"><a class="reference internal" href="../architecture/checkpoint.html">TensorRT-LLM Checkpoint</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../architecture/workflow.html">TensorRT-LLM Build Workflow</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../architecture/add-model.html">Adding a Model</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Advanced</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-runtime.html">C++ GPT Runtime</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/executor.html">Executor API</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/graph-rewriting.html">Graph Rewriting Module</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/lora.html">Run gpt-2b + LoRA using Executor / cpp runtime</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/expert-parallelism.html">Expert Parallelism in TensorRT-LLM</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/kv-cache-reuse.html">KV cache reuse</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/speculative-decoding.html">Speculative Sampling</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../advanced/disaggregated-service.html">Disaggregated-Service (experimental)</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Performance</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../performance/perf-overview.html">Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../performance/perf-benchmarking.html">Benchmarking</a></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../performance/performance-tuning-guide/index.html">Performance Tuning Guide</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/benchmarking-default-performance.html">Benchmarking Default Performance</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/useful-build-time-flags.html">Useful Build-Time Flags</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html">Tuning Max Batch Size and Max Num Tokens</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/deciding-model-sharding-strategy.html">Deciding Model Sharding Strategy</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/fp8-quantization.html">FP8 Quantization</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/useful-runtime-flags.html">Useful Runtime Options</a></li>
|
||
</ul>
|
||
</details></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../performance/perf-analysis.html">Performance Analysis</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Reference</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../reference/troubleshooting.html">Troubleshooting</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../reference/support-matrix.html">Support Matrix</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../reference/precision.html">Numerical Precision</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../reference/memory.html">Memory Usage of TensorRT-LLM</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../blogs/Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../blogs/quantization-in-TRT-LLM.html">Speed up inference with SOTA quantization techniques in TRT-LLM</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
|
||
</ul>
|
||
</div>
|
||
</nav></div>
|
||
</div>
|
||
|
||
|
||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||
</div>
|
||
|
||
|
||
|
||
</div>
|
||
|
||
<main id="main-content" class="bd-main" role="main">
|
||
|
||
|
||
<div class="bd-content">
|
||
<div class="bd-article-container">
|
||
|
||
<div class="bd-header-article d-print-none">
|
||
<div class="header-article-items header-article__inner">
|
||
|
||
<div class="header-article-items__start">
|
||
|
||
<div class="header-article-item">
|
||
|
||
<nav aria-label="Breadcrumb" class="d-print-none">
|
||
<ul class="bd-breadcrumbs">
|
||
|
||
<li class="breadcrumb-item breadcrumb-home">
|
||
<a href="../index.html" class="nav-link" aria-label="Home">
|
||
<i class="fa-solid fa-home"></i>
|
||
</a>
|
||
</li>
|
||
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">Functionals</span></li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<div id="searchbox"></div>
|
||
<article class="bd-article">
|
||
|
||
<section id="module-tensorrt_llm">
|
||
<span id="functionals"></span><h1>Functionals<a class="headerlink" href="#module-tensorrt_llm" title="Link to this heading">#</a></h1>
|
||
<dl class="py class" id="module-tensorrt_llm.functional">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">AllReduceFusionOp</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AllReduceFusionOp"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB">
|
||
<span class="sig-name descname"><span class="pre">LAST_PROCESS_FOR_UB</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.MOE_ALLREDUCE_RESIDUAL_RMS_NORM">
|
||
<span class="sig-name descname"><span class="pre">MOE_ALLREDUCE_RESIDUAL_RMS_NORM</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">8</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.MOE_ALLREDUCE_RESIDUAL_RMS_NORM" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.NONE">
|
||
<span class="sig-name descname"><span class="pre">NONE</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.NONE" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM">
|
||
<span class="sig-name descname"><span class="pre">RESIDUAL_RMS_NORM</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8">
|
||
<span class="sig-name descname"><span class="pre">RESIDUAL_RMS_NORM_OUT_QUANT_FP8</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">6</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4">
|
||
<span class="sig-name descname"><span class="pre">RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">7</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8">
|
||
<span class="sig-name descname"><span class="pre">RESIDUAL_RMS_NORM_QUANT_FP8</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">4</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4">
|
||
<span class="sig-name descname"><span class="pre">RESIDUAL_RMS_NORM_QUANT_NVFP4</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM">
|
||
<span class="sig-name descname"><span class="pre">RESIDUAL_RMS_PREPOST_NORM</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceParams">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">AllReduceParams</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">strategy</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.AllReduceStrategy" title="tensorrt_llm.functional.AllReduceStrategy"><span class="pre">AllReduceStrategy</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">AllReduceStrategy.AUTO</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">fusion_op</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.AllReduceFusionOp" title="tensorrt_llm.functional.AllReduceFusionOp"><span class="pre">AllReduceFusionOp</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">AllReduceFusionOp.NONE</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">residual</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">norm_weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">norm_pre_residual_weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1e-06</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">enable_allreduce</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AllReduceParams"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AllReduceParams" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceParams.has_affine">
|
||
<span class="sig-name descname"><span class="pre">has_affine</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AllReduceParams.has_affine"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AllReduceParams.has_affine" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceParams.has_bias">
|
||
<span class="sig-name descname"><span class="pre">has_bias</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AllReduceParams.has_bias"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AllReduceParams.has_bias" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceParams.has_scale">
|
||
<span class="sig-name descname"><span class="pre">has_scale</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AllReduceParams.has_scale"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AllReduceParams.has_scale" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceParams.update_strategy">
|
||
<span class="sig-name descname"><span class="pre">update_strategy</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AllReduceParams.update_strategy"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AllReduceParams.update_strategy" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">AllReduceStrategy</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AllReduceStrategy"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy.AUTO">
|
||
<span class="sig-name descname"><span class="pre">AUTO</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.AUTO" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY">
|
||
<span class="sig-name descname"><span class="pre">MIN_LATENCY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy.NCCL">
|
||
<span class="sig-name descname"><span class="pre">NCCL</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.NCCL" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy.ONESHOT">
|
||
<span class="sig-name descname"><span class="pre">ONESHOT</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">4</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.ONESHOT" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy.TWOSHOT">
|
||
<span class="sig-name descname"><span class="pre">TWOSHOT</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.TWOSHOT" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AllReduceStrategy.UB">
|
||
<span class="sig-name descname"><span class="pre">UB</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.AllReduceStrategy.UB" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">AttentionMaskType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#AttentionMaskType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType.bidirectional">
|
||
<span class="sig-name descname"><span class="pre">bidirectional</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3</span></em><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType.bidirectional" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType.bidirectionalglm">
|
||
<span class="sig-name descname"><span class="pre">bidirectionalglm</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">4</span></em><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType.bidirectionalglm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType.blocksparse">
|
||
<span class="sig-name descname"><span class="pre">blocksparse</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType.blocksparse" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType.causal">
|
||
<span class="sig-name descname"><span class="pre">causal</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType.causal" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType.custom_mask">
|
||
<span class="sig-name descname"><span class="pre">custom_mask</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">6</span></em><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType.custom_mask" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType.padding">
|
||
<span class="sig-name descname"><span class="pre">padding</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType.padding" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.AttentionMaskType.sliding_window_causal">
|
||
<span class="sig-name descname"><span class="pre">sliding_window_causal</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.AttentionMaskType.sliding_window_causal" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Conditional">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">Conditional</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">condition</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Conditional"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Conditional" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<p>Add an operation to conditionally execute two code paths/subgraphs.</p>
|
||
<dl class="simple">
|
||
<dt>Usage:</dt><dd><ol class="arabic simple">
|
||
<li><p>conditional = Conditional(condition)</p></li>
|
||
<li><p><a href="#id1"><span class="problematic" id="id2">input_1_</span></a> = conditional.add_input(input_1)
|
||
…
|
||
<a href="#id3"><span class="problematic" id="id4">input_n_</span></a> = conditional.add_input(input_n)</p></li>
|
||
<li><p>Construct the graph to get true_output_value and false_output_value using <a href="#id5"><span class="problematic" id="id6">input_1_</span></a>, …, <a href="#id7"><span class="problematic" id="id8">input_n_</span></a></p></li>
|
||
<li><p>output = conditional.add_output(true_output_value, false_output_value)</p></li>
|
||
</ol>
|
||
</dd>
|
||
</dl>
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Conditional.add_input">
|
||
<span class="sig-name descname"><span class="pre">add_input</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Conditional.add_input"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Conditional.add_input" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Conditional.add_output">
|
||
<span class="sig-name descname"><span class="pre">add_output</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">true_value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">false_value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Conditional.add_output"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Conditional.add_output" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.DimRange">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">DimRange</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">shape</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#DimRange"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.DimRange" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<p>One DimRange object stores the ranges of all the dimensions of one tensor in one optimization profile.
|
||
For example, tensor has 2 dimensions. Then the data members are:</p>
|
||
<blockquote>
|
||
<div><p>self.min = [dim 0 min, dim 1 min]
|
||
self.opt = [dim 0 opt, dim 1 opt]
|
||
self.max = [dim 0 max, dim 1 max]</p>
|
||
</div></blockquote>
|
||
<p>For static dimension, it has min==opt==max, thus the shape param in the ctor can be an integer</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.LayerNormPositionType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">LayerNormPositionType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#LayerNormPositionType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.LayerNormPositionType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.LayerNormPositionType.post_layernorm">
|
||
<span class="sig-name descname"><span class="pre">post_layernorm</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.LayerNormPositionType.post_layernorm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.LayerNormPositionType.pre_layernorm">
|
||
<span class="sig-name descname"><span class="pre">pre_layernorm</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.LayerNormPositionType.pre_layernorm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.LayerNormType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">LayerNormType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#LayerNormType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.LayerNormType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.LayerNormType.GroupNorm">
|
||
<span class="sig-name descname"><span class="pre">GroupNorm</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.LayerNormType.GroupNorm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.LayerNormType.LayerNorm">
|
||
<span class="sig-name descname"><span class="pre">LayerNorm</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.LayerNormType.LayerNorm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.LayerNormType.RmsNorm">
|
||
<span class="sig-name descname"><span class="pre">RmsNorm</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.LayerNormType.RmsNorm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.MLPType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">MLPType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#MLPType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.MLPType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.MLPType.FusedGatedMLP">
|
||
<span class="sig-name descname"><span class="pre">FusedGatedMLP</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.MLPType.FusedGatedMLP" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.MLPType.GatedMLP">
|
||
<span class="sig-name descname"><span class="pre">GatedMLP</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.MLPType.GatedMLP" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.MLPType.MLP">
|
||
<span class="sig-name descname"><span class="pre">MLP</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.MLPType.MLP" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">PositionEmbeddingType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#PositionEmbeddingType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.alibi">
|
||
<span class="sig-name descname"><span class="pre">alibi</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">4</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.alibi" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale">
|
||
<span class="sig-name descname"><span class="pre">alibi_with_scale</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.chatglm">
|
||
<span class="sig-name descname"><span class="pre">chatglm</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">7</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.chatglm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.choices">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">choices</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#PositionEmbeddingType.choices"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.choices" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.deferred">
|
||
<span class="sig-name descname"><span class="pre">deferred</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">10</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.deferred" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.from_string">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_string</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">s</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#PositionEmbeddingType.from_string"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.from_string" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.is_alibi">
|
||
<span class="sig-name descname"><span class="pre">is_alibi</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#PositionEmbeddingType.is_alibi"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.is_alibi" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.is_deferred">
|
||
<span class="sig-name descname"><span class="pre">is_deferred</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#PositionEmbeddingType.is_deferred"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.is_deferred" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.is_mrope">
|
||
<span class="sig-name descname"><span class="pre">is_mrope</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#PositionEmbeddingType.is_mrope"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.is_mrope" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.is_rope">
|
||
<span class="sig-name descname"><span class="pre">is_rope</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#PositionEmbeddingType.is_rope"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.is_rope" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.learned_absolute">
|
||
<span class="sig-name descname"><span class="pre">learned_absolute</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.learned_absolute" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.long_rope">
|
||
<span class="sig-name descname"><span class="pre">long_rope</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.long_rope" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.mrope">
|
||
<span class="sig-name descname"><span class="pre">mrope</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">9</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.mrope" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.relative">
|
||
<span class="sig-name descname"><span class="pre">relative</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">6</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.relative" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox">
|
||
<span class="sig-name descname"><span class="pre">rope_gpt_neox</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.rope_gptj">
|
||
<span class="sig-name descname"><span class="pre">rope_gptj</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.rope_gptj" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.PositionEmbeddingType.yarn">
|
||
<span class="sig-name descname"><span class="pre">yarn</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">8</span></em><a class="headerlink" href="#tensorrt_llm.functional.PositionEmbeddingType.yarn" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">RopeEmbeddingUtils</span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">apply_llama3_scaling</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">inv_freqs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ndarray</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">rope_scaling_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.apply_llama3_scaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">apply_rotary_pos_emb</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">position_embedding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">pos_emb_type</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.PositionEmbeddingType" title="tensorrt_llm.functional.PositionEmbeddingType"><span class="pre">PositionEmbeddingType</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">PositionEmbeddingType.rope_gptj</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.apply_rotary_pos_emb"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">apply_rotary_pos_emb_chatglm</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qkv</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">position_embedding</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_attention_heads</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">attention_head_size</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">rotary_embedding_scale</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">remove_input_padding</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">apply_rotary_pos_emb_cogvlm</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qkv</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">position_embedding</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_attention_heads</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">attention_head_size</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">rotary_embedding_scale</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">remove_input_padding</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">create_fake_weight</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype=<class</span> <span class="pre">'numpy.float16'></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.create_fake_weight"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">create_sinusoidal_positions</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_pos:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">theta:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">10000.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype=<class</span> <span class="pre">'numpy.float32'></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.create_sinusoidal_positions"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">create_sinusoidal_positions_for_attention_plugin</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_pos:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">theta:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">10000.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_type:</span> <span class="pre">~tensorrt_llm.functional.RotaryScalingType</span> <span class="pre">=</span> <span class="pre">RotaryScalingType.none</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">rope_scaling_config:</span> <span class="pre">dict</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype=<class</span> <span class="pre">'numpy.float32'></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">create_sinusoidal_positions_for_cogvlm_attention_plugin</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_pos:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">theta:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">10000.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_type:</span> <span class="pre">~tensorrt_llm.functional.RotaryScalingType</span> <span class="pre">=</span> <span class="pre">RotaryScalingType.none</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">vision_start:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">vision_length:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1225</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype=<class</span> <span class="pre">'numpy.float32'></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope">
|
||
<span class="sig-name descname"><span class="pre">create_sinusoidal_positions_long_rope</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_orig_pos:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">theta:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">10000.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scaling_short_factors:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scaling_long_factors:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">short_mscale=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">long_mscale=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype=<class</span> <span class="pre">'numpy.float32'></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.create_sinusoidal_positions_long_rope"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">create_sinusoidal_positions_yarn</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_pos:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim:</span> <span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">base:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">10000</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scaling_factor:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">original_max_position_embeddings:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">4096</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">beta_fast:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">32</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">beta_slow:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mscale:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mscale_all_dim:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype=<class</span> <span class="pre">'numpy.float32'></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.create_sinusoidal_positions_yarn"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">rotate_every_two</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.rotate_every_two"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">rotate_half</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RopeEmbeddingUtils.rotate_half"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">RotaryScalingType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RotaryScalingType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.dynamic">
|
||
<span class="sig-name descname"><span class="pre">dynamic</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.dynamic" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.from_string">
|
||
<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_string</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">s</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#RotaryScalingType.from_string"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.from_string" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.linear">
|
||
<span class="sig-name descname"><span class="pre">linear</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.linear" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.llama3">
|
||
<span class="sig-name descname"><span class="pre">llama3</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">4</span></em><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.llama3" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.longrope">
|
||
<span class="sig-name descname"><span class="pre">longrope</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3</span></em><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.longrope" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.mrope">
|
||
<span class="sig-name descname"><span class="pre">mrope</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">6</span></em><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.mrope" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.none">
|
||
<span class="sig-name descname"><span class="pre">none</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.none" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.RotaryScalingType.yarn">
|
||
<span class="sig-name descname"><span class="pre">yarn</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#tensorrt_llm.functional.RotaryScalingType.yarn" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SideStreamIDType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">SideStreamIDType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#SideStreamIDType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.SideStreamIDType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SideStreamIDType.disable">
|
||
<span class="sig-name descname"><span class="pre">disable</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.SideStreamIDType.disable" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SideStreamIDType.moe">
|
||
<span class="sig-name descname"><span class="pre">moe</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.SideStreamIDType.moe" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SliceInputType">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">SliceInputType</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">names=<not</span> <span class="pre">given></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#SliceInputType"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.SliceInputType" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">IntEnum</span></code></p>
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SliceInputType.axes">
|
||
<span class="sig-name descname"><span class="pre">axes</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#tensorrt_llm.functional.SliceInputType.axes" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SliceInputType.data">
|
||
<span class="sig-name descname"><span class="pre">data</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.functional.SliceInputType.data" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SliceInputType.fill_value">
|
||
<span class="sig-name descname"><span class="pre">fill_value</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">4</span></em><a class="headerlink" href="#tensorrt_llm.functional.SliceInputType.fill_value" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SliceInputType.size">
|
||
<span class="sig-name descname"><span class="pre">size</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#tensorrt_llm.functional.SliceInputType.size" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SliceInputType.start">
|
||
<span class="sig-name descname"><span class="pre">start</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.functional.SliceInputType.start" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py attribute">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.SliceInputType.stride">
|
||
<span class="sig-name descname"><span class="pre">stride</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">3</span></em><a class="headerlink" href="#tensorrt_llm.functional.SliceInputType.stride" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py class">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor">
|
||
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">Tensor</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">name=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">shape=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim_range=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">is_network_input=True</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">location=<TensorLocation.DEVICE:</span> <span class="pre">0></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">network=None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">trt_tensor=None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor" title="Link to this definition">#</a></dt>
|
||
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
||
<p>The class to represent dense tensors.</p>
|
||
<p>A dense tensor is named, has a shape and contains typed elements. Each
|
||
dimension of a tensor can either be static or dynamic. Static dimensions
|
||
are known at engine compilation by TensorRT. Dynamic dimensions can take
|
||
values determined at runtime. The tensor can be located on the host (CPU)
|
||
or the device (GPU).</p>
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.abs">
|
||
<span class="sig-name descname"><span class="pre">abs</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.abs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.abs" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.abs.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.cast">
|
||
<span class="sig-name descname"><span class="pre">cast</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.cast"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.cast" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.cast.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.dtype">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">dtype</span></span><a class="headerlink" href="#tensorrt_llm.functional.Tensor.dtype" title="Link to this definition">#</a></dt>
|
||
<dd><p>The type of the elements in the tensor.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.flatten">
|
||
<span class="sig-name descname"><span class="pre">flatten</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">start_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">end_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">-1</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.flatten"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.flatten" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.flatten.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.get_parent">
|
||
<span class="sig-name descname"><span class="pre">get_parent</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.get_parent"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.get_parent" title="Link to this definition">#</a></dt>
|
||
<dd><p>Get the layer that produces this tensor.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.get_users">
|
||
<span class="sig-name descname"><span class="pre">get_users</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.get_users"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.get_users" title="Link to this definition">#</a></dt>
|
||
<dd><p>Get the layers that use this tensor as an input.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.is_dynamic">
|
||
<span class="sig-name descname"><span class="pre">is_dynamic</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.is_dynamic"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.is_dynamic" title="Link to this definition">#</a></dt>
|
||
<dd><p>If the argument ‘dim’ is None, that function returns a boolean that
|
||
indicates if the tensor contains a dynamic dimension (True) or not
|
||
(False). In that case, the first dimension is excluded (as it usually
|
||
corresponds to the batch size). If the argument is an integer, that
|
||
functions returns a boolean that indicates if the dimension ‘dim’ is
|
||
dynamic (True) or not (False).</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.is_trt_wrapper">
|
||
<span class="sig-name descname"><span class="pre">is_trt_wrapper</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.is_trt_wrapper"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.is_trt_wrapper" title="Link to this definition">#</a></dt>
|
||
<dd><p>Check if there is a trt.ITensor member inside, which is required for
|
||
graph rewriter. In order to differentiate usages, it may be necessary
|
||
to have an inheritance hierarchy.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.location">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">location</span></span><a class="headerlink" href="#tensorrt_llm.functional.Tensor.location" title="Link to this definition">#</a></dt>
|
||
<dd><p>The physical location of the tensor (on the host or the device).</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.log">
|
||
<span class="sig-name descname"><span class="pre">log</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.log"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.log" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.log.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.mark_output">
|
||
<span class="sig-name descname"><span class="pre">mark_output</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">DataType</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.mark_output"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.mark_output" title="Link to this definition">#</a></dt>
|
||
<dd><p>Mark a tensor as a network output.</p>
|
||
<p>When a tensor is marked as an output, its content can be obtained after
|
||
the execution of the TensorRT engine. The user is responsible for
|
||
allocating buffers to store the output tensors when preparing the
|
||
execution of the TensorRT engine.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.max">
|
||
<span class="sig-name descname"><span class="pre">max</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.max"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.max" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.max.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.mean">
|
||
<span class="sig-name descname"><span class="pre">mean</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.mean"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.mean" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.mean.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.name">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">name</span></span><a class="headerlink" href="#tensorrt_llm.functional.Tensor.name" title="Link to this definition">#</a></dt>
|
||
<dd><p>The name of the tensor.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.ndim">
|
||
<span class="sig-name descname"><span class="pre">ndim</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.ndim"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.ndim" title="Link to this definition">#</a></dt>
|
||
<dd><p>Returns the rank (i.e. the number of dimensions) of the tensor.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.network">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">network</span></span><a class="headerlink" href="#tensorrt_llm.functional.Tensor.network" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.permute">
|
||
<span class="sig-name descname"><span class="pre">permute</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dims</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.permute"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.permute" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.permute.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.rank">
|
||
<span class="sig-name descname"><span class="pre">rank</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.rank"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.rank" title="Link to this definition">#</a></dt>
|
||
<dd><p>Returns the rank (i.e. the number of dimensions) of the tensor.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.repeat">
|
||
<span class="sig-name descname"><span class="pre">repeat</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">sizes</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.repeat"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.repeat" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.repeat</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.replace_all_uses_with">
|
||
<span class="sig-name descname"><span class="pre">replace_all_uses_with</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">new_tensor</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.replace_all_uses_with"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.replace_all_uses_with" title="Link to this definition">#</a></dt>
|
||
<dd><p>Replace all uses of this tensor as an input to consumer layers</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.select">
|
||
<span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">index</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.select"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.select" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.select.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py property">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.shape">
|
||
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">shape</span></span><a class="headerlink" href="#tensorrt_llm.functional.Tensor.shape" title="Link to this definition">#</a></dt>
|
||
<dd><p>The shape of the tensor.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.size">
|
||
<span class="sig-name descname"><span class="pre">size</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.size"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.size" title="Link to this definition">#</a></dt>
|
||
<dd><p>Returns the shape of the tensor if the dim parameter is None.
|
||
Otherwise, returns a size of the dimension indicated by dim. The
|
||
behavior is undefined if dim is negative or exceeds the rank of the
|
||
tensor.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.split">
|
||
<span class="sig-name descname"><span class="pre">split</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">split_size_or_sections</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.split"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.split" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.split.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.sqrt">
|
||
<span class="sig-name descname"><span class="pre">sqrt</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.sqrt"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.sqrt" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.sqrt.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.squeeze">
|
||
<span class="sig-name descname"><span class="pre">squeeze</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_is_placeholder</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.squeeze"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.squeeze" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.squeeze.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.transpose">
|
||
<span class="sig-name descname"><span class="pre">transpose</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dim1</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.transpose"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.transpose" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.transpose.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.unbind">
|
||
<span class="sig-name descname"><span class="pre">unbind</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.unbind"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.unbind" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.unbind.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.unsqueeze">
|
||
<span class="sig-name descname"><span class="pre">unsqueeze</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dim</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.unsqueeze"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.unsqueeze" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.squeeze.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py method">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.Tensor.view">
|
||
<span class="sig-name descname"><span class="pre">view</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">shape</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">zero_is_placeholder</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#Tensor.view"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.Tensor.view" title="Link to this definition">#</a></dt>
|
||
<dd><p>See functional.view.</p>
|
||
</dd></dl>
|
||
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.abs">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">abs</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.ABS:</span> <span class="pre">4></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.abs" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.activation">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">activation</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">act_type</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ActivationType</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#activation"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.activation" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an activation function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p></li>
|
||
<li><p><strong>act_type</strong> – trt.ActivationType
|
||
The type of the activation (RELU, TANH, SIGMOID, …).</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>relu for op=trt.ActivationType.RELU
|
||
tanh for op=trt.ActivationType.TANH
|
||
sigmoid for op=trt.ActivationType.SIGMOID</p>
|
||
</div></blockquote>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.add">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">add</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.SUM:</span> <span class="pre">0></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.add" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.allgather">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">allgather</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">gather_dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#allgather"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.allgather" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that performs a collective all-gather.</p>
|
||
<p>Let’s define ‘group_size’ as the length of the ‘group’ list. That functions
|
||
creates a layer to gather ‘group_size’ tensors distributed
|
||
amongst the ‘group_size’ participating ranks (one GPU per rank).</p>
|
||
<p>The list ‘group’ contains the identifiers of the ranks participating into
|
||
the collective operation.</p>
|
||
<p>Note that ‘group’ here can be either TP group or PP group, because allgather communication is not limited to a specific split pattern. Therefore ‘group_size’ does not need to equal MPI ‘world_size’.</p>
|
||
<p>The tensors in the different ranks must be 1D tensors (or views) and the
|
||
output tensor will have that same shape.</p>
|
||
<p>Given the ‘section_size = input.shape[0] / group_size’, each rank
|
||
contributes a section of its input tensor that correspond to
|
||
‘rank*section_size:(rank+1)*section_size’.</p>
|
||
<p>That operation is implemented using a plugin that wraps the NCCL all-gather
|
||
collective operation. See
|
||
<a class="reference external" href="https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#allgather">https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#allgather</a>
|
||
for details.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>tensor</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>group</strong> – List[int]
|
||
The ranks participating into the all-gather operation.</p></li>
|
||
<li><p><strong>gather_dim</strong> – int = 0
|
||
Gather along given dimension. By default 0, i.e. treated as 1D tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.allreduce">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">allreduce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">tensor:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">group:</span> <span class="pre">~typing.List[int],</span> <span class="pre">all_reduce_params:</span> <span class="pre">~tensorrt_llm.functional.AllReduceParams</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre"><tensorrt_llm.functional.AllReduceParams</span> <span class="pre">object></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#allreduce"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.allreduce" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that performs a collective all-reduce.</p>
|
||
<p>Let’s define ‘world_size’ as the length of the ‘group’ list. That functions
|
||
creates a layer to compute the sum of ‘world_size’ tensors distributed
|
||
amongst the ‘world_size’ participating ranks (one GPU per rank).</p>
|
||
<p>The list ‘group’ contains the identifiers of the ranks participating into
|
||
the collective operation.</p>
|
||
<p>The tensors in the different ranks must be 1D tensors (or views) and the output
|
||
tensor will have that same shape. The output tensor will be replicated on
|
||
the ‘world_size’ ranks.</p>
|
||
<p>That operation is implemented using a plugin that wraps the NCCL all-reduce
|
||
collective operation. See
|
||
<a class="reference external" href="https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#allreduce">https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#allreduce</a>
|
||
for details.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>tensor</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>group</strong> – List[int]
|
||
The ranks participating into the all-reduce operation.</p></li>
|
||
<li><p><strong>strategy</strong> – AllReduceStrategy
|
||
NCCL delegates all-reduce to NCCL while ONESHOT and TWOSHOT are custom latency-optimal algorithms.
|
||
AUTO chooses amongst the three based on a message-size heuristic.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.arange">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">arange</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">end</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#arange"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.arange" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to fill a 1D tensor.</p>
|
||
<p>The tensor is filled with the values between start and end with a step of 1
|
||
between the different elements. In pseudo-code, it corresponds to a tensor
|
||
populated with the values:</p>
|
||
<blockquote>
|
||
<div><p>output = Tensor([dtype(ii) for ii in range(start, end, 1)])</p>
|
||
</div></blockquote>
|
||
<p>For example, a call to arange(3, 6, ‘int32’) will add an operation to the
|
||
TensorRT graph that will produce [3, 4, 5] when executed. The call to
|
||
arange(2, 5, ‘float32’) will add a layer to generate [2.0, 3.0, 4.0].</p>
|
||
<p>This operation is implemented using a tensorrt.IFillLayer in
|
||
trt.FillOperation.LINSPACE mode.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>start</strong> – Union[Tensor, int]
|
||
The starting point of the range.</p></li>
|
||
<li><p><strong>end</strong> – Union[Tensor, int]
|
||
The end point of the range.</p></li>
|
||
<li><p><strong>dtype</strong> – str
|
||
The type of the elements. See _str_to_trt_dtype_dict in _utils.py
|
||
for a list of supported types and type names.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the fill layer. It is a 1D tensor containing
|
||
<cite>end-start</cite> elements of type <cite>dtype</cite>.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.argmax">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">argmax</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#argmax"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.argmax" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an argmax operation.</p>
|
||
<p>As explained in the ONNX documentation,</p>
|
||
<blockquote>
|
||
<div><p><a class="github reference external" href="https://github.com/onnx/onnx/blob/main/docs/Operators.md#argmax">onnx/onnx</a></p>
|
||
</div></blockquote>
|
||
<p>that function creates a layer computing the indices of the max elements of
|
||
the input tensor’s element along the provided dim. The resulting tensor
|
||
has the same rank as the input if keepdims is True. If keepdims is False,
|
||
then the resulting tensor has the reduced dimension pruned.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension in which to compute the argmax indices.</p></li>
|
||
<li><p><strong>keepdim</strong> – bool
|
||
Do we keep the dimension along which the reduction is performed?
|
||
Yes, if set to True, no otherwise.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this argmax operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.assertion">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">assertion</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">condition</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">message</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">''</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#assertion"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.assertion" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.avg_pool2d">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">avg_pool2d</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">kernel_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">stride</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(0,</span> <span class="pre">0)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">ceil_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">count_include_pad</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#avg_pool2d"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.avg_pool2d" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.bert_attention">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">bert_attention</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input_lengths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">head_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">q_scaling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">relative_attention</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">relative_attention_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">max_distance</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">max_input_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">sage_attn</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">sage_attn_q_block_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">sage_attn_k_block_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">sage_attn_v_block_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#bert_attention"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.bert_attention" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that performs the multi-head attention in BERT.</p>
|
||
<p>The multi-head attention (MHA) is the sequence of a batched matmul, a
|
||
softmax and a batched matmul as described in
|
||
<a class="reference external" href="https://arxiv.org/abs/1706.03762">https://arxiv.org/abs/1706.03762</a>. That function adds an operation that
|
||
performs those computations using a single GPU kernel.</p>
|
||
<p>The input tensor contains the Q, K and V elements. It is a 2D tensor and
|
||
its shape is ‘[sum_of_tokens, 3*hidden_dim]’ where the ‘sum_of_tokens’ is
|
||
the sum of the sequence lengths in the batch.</p>
|
||
<p>In MHA, the output of the Q*K^T product is scaled by a constant value that
|
||
is computed as:</p>
|
||
<blockquote>
|
||
<div><p>1.f / (q_scaling * sqrt(head_size)).</p>
|
||
</div></blockquote>
|
||
<p>That ‘q_scaling’ constant is the last argument of that function.</p>
|
||
<p>That layer is implemented using a plugin (see bertAttentionPlugin).</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>tensor</strong> – Tensor
|
||
The QKV input tensor.</p></li>
|
||
<li><p><strong>input_lengths</strong> – Tensor
|
||
The length of each sequence. It is a 1D tensor of size ‘batch_size’.</p></li>
|
||
<li><p><strong>num_heads</strong> – int
|
||
The number of heads.</p></li>
|
||
<li><p><strong>head_size</strong> – int
|
||
The size of each head.</p></li>
|
||
<li><p><strong>q_scaling</strong> – float
|
||
The factor to compute the scaling factor to scale the output of the
|
||
‘Q*K^T’ product.</p></li>
|
||
<li><p><strong>relative_attention</strong> – bool = False
|
||
If enable relative attention.</p></li>
|
||
<li><p><strong>relative_attention_bias</strong> – Tensor = None
|
||
The relative attention bias [num_heads, max_seq_len, max_seq_len], or The relative attention embedding table for implicit mode, [num_heads, num_buckets].</p></li>
|
||
<li><p><strong>max_distance</strong> – int = 0
|
||
The maximum distance of relative position in attention, for implicit mode.
|
||
Default value is 0, meaning to use the regular mode of relative attention bias.
|
||
Implicit mode is only enabled when passing in non-zero positive max_distance value.
|
||
See relative attention bias in docs/source/advanced/gpt-attention.md</p></li>
|
||
<li><p><strong>max_input_length</strong> – Tensor = None
|
||
The maximum input sequence length represented by Tensor shape. Requires for remove_input_padding to pre-define plugin workspace size.</p></li>
|
||
<li><p><strong>sage_attn</strong> – bool = False
|
||
SageAttention is a 8-bit implementation of attention kernel. It’s input q, k, v and output datatypes are 16-bit. It performance dynamic quantization for q, k, v
|
||
tensor every time before attention. <a class="github reference external" href="https://github.com/thu-ml/SageAttention">thu-ml/SageAttention</a></p></li>
|
||
<li><p><strong>sage_attn_q_quant_size</strong> – int = 0
|
||
dynamic quant block size along sequence dimension of q tensor. Each quant block will share one scale.</p></li>
|
||
<li><p><strong>sage_attn_k_quant_size</strong> – int = 0
|
||
dynamic quant block size along sequence dimension of k tensor. Each quant block will share one scale.</p></li>
|
||
<li><p><strong>sage_attn_v_quant_size</strong> – int = 0
|
||
dynamic quant block size along sequence dimension of v tensor. Each quant block will share one scale.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.broadcast_helper">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">broadcast_helper</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#broadcast_helper"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.broadcast_helper" title="Link to this definition">#</a></dt>
|
||
<dd><p>Helper function to perform a broadcast.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A pair of tensors of same rank.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.cast">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">cast</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">DataType</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#cast"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.cast" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a cast operation.</p>
|
||
<p>For an input tensor of type INT8, this function sets the dynamic range of
|
||
the input to [-127, 127] for automatic dequantization. For a cast into
|
||
INT8, that function sets the dynamic range of the output to [-127, 127] for
|
||
automatic quantization.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the cast is applied.</p></li>
|
||
<li><p><strong>dtype</strong> – str or trt.DataType
|
||
The data type of the output tensor after the cast. When ‘dtype’ is
|
||
provided as a string, it must be a name amongst the valid names.
|
||
See _str_to_trt_dtype_dict in _utils.py for a list of supported
|
||
types and type names.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the inserted layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.categorical_sample">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">categorical_sample</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">probs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">rand_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#categorical_sample"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.categorical_sample" title="Link to this definition">#</a></dt>
|
||
<dd><p>This is a sampling operation and an equivalent of torch.distributions.Categorical.sample()
|
||
i.e. given a probability distribution tensor, it samples an index of that tensor.
|
||
See: <a class="reference external" href="https://pytorch.org/docs/stable/distributions.html#torch.distributions.categorical.Categorical.sample">https://pytorch.org/docs/stable/distributions.html#torch.distributions.categorical.Categorical.sample</a>
|
||
NOTE: This assumes that the given probabilities are <strong>not</strong> normalized.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>probs</strong> – Tensor
|
||
A 1-D floating point tensor representing the probability distributions.</p></li>
|
||
<li><p><strong>rand_data</strong> – Tensor (optional)
|
||
A random tensor of same shape as <cite>probs</cite> tensor.
|
||
If not provided, this function will add a rand() op to generate it and use for sampling.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor containing a single index of the <cite>probs</cite> tensor representing the sample.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.chunk">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">chunk</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">chunks</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#chunk"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.chunk" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that splits a tensor into sub-tensors.</p>
|
||
<p>This operation creates a list of tensors that are obtained from the input
|
||
tensor by chunking it along the dimension ‘dim’. It produces ‘chunks’
|
||
sub-tensors.</p>
|
||
<p>That operation is only defined for static tensors (no dynamic dimension)
|
||
and the size of the tensor in the dimension ‘dim’ must be a multiple of
|
||
‘chunks’: ‘input.shape[dim] % chunks == 0’.</p>
|
||
<p>It maps to ‘split’ with ‘split_size = input.shape[dim] / chunks’.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>tensor</strong> – Tensor
|
||
The input tensor to slice.</p></li>
|
||
<li><p><strong>chunks</strong> – int
|
||
The number of slices to split the input tensor into.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension of the tensor to slice.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The list of tensors produced by the different operations.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.clip">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">clip</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">beta</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#clip"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.clip" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a CLIP operation that sets the range to [alpha, beta].</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p></li>
|
||
<li><p><strong>alpha</strong> – float
|
||
The lower bound of the CLIP function.</p></li>
|
||
<li><p><strong>beta</strong> – float
|
||
The upper bound of the CLIP function.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.concat">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">concat</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#concat"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.concat" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to concatenate tensors.</p>
|
||
<p>The function creates an operation that concatenates the tensors from the
|
||
sequence ‘inputs’. The concatenation is done along the dimension ‘dim’.</p>
|
||
<p>All the tensors in ‘inputs’ must have the same shape expect for the
|
||
dimension ‘dim’.</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>for ii in range(inputs[0].rank()):</dt><dd><p>assert (ii == dim) or all(inp.shape[ii] == inputs[0].shape[ii] for inp in inputs)</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<p>The shape of the output tensor is defined as:</p>
|
||
<blockquote>
|
||
<div><dl>
|
||
<dt>for ii in range(inputs[0].rank()):</dt><dd><p># Same size as all the inputs in dimension ii != dim.
|
||
output.shape[ii] = inputs[0].shape[ii]</p>
|
||
<p># Sum of the sizes in the different inputs in dimension ‘dim’.
|
||
if ii == dim:</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>for jj in range(1, len(inputs)):</dt><dd><p>output.shape[ii] += inputs[jj].shape[ii]</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<p>For example, given a sequence of two 2D tensors [[0, 1], [2, 3]] and
|
||
[[4, 5], [6, 7]] both of shape [2, 2],</p>
|
||
<blockquote>
|
||
<div><p>concat(inputs, 0)</p>
|
||
</div></blockquote>
|
||
<p>will produce [[0, 1], [2, 3], [4, 5], [6, 7]] of shape [4, 2] and</p>
|
||
<blockquote>
|
||
<div><p>concat(inputs, 1)</p>
|
||
</div></blockquote>
|
||
<p>will produce [[0, 1, 4, 5], [2, 3, 6, 7]] of shape [2, 4].</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>inputs</strong> – Sequence[Union[Tensor, int]]
|
||
The sequence of tensors to concatenate. For integers, that function
|
||
creates constant tensors.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension in which the concatenation is performed.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor that contains the concatenation of the tensors.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.constant">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">constant</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">ndarray</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ndarray</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">as_dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataType</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">as_shape</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#constant"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.constant" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a constant layer.</p>
|
||
<p>TensorRT graphs encapsulate constant values in the form of constant layers
|
||
(tensorrt.IConstantLayer). That function creates such a layer from a Numpy
|
||
array of values. After compilation of the network by TensorRT, those
|
||
weights are stored in the serialized TensorRT engine.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>ndarray</strong> – numpy.ndarray
|
||
The array of values (weights) encapsulated by this constant layer.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the inserted layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.constant_to_tensor_">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">constant_to_tensor_</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">bool</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataType</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">to_array</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#constant_to_tensor_"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.constant_to_tensor_" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.constants_to_tensors_">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">constants_to_tensors_</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#constants_to_tensors_"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.constants_to_tensors_" title="Link to this definition">#</a></dt>
|
||
<dd><p>Helper function to create tensors from multiple inputs.</p>
|
||
<p>For each inputs, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if any input is int64, it upcasts other
|
||
integer inputs to int64.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>inputs</strong> – Tuple[Union[Tensor, int, float], …]
|
||
The inputs to create tensors from.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tuple of tensors.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.conv1d">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">conv1d</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">stride</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dilation</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#conv1d"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.conv1d" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.conv2d">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">conv2d</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">stride</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(1,</span> <span class="pre">1)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(0,</span> <span class="pre">0)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dilation</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(1,</span> <span class="pre">1)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">pre_padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">post_padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#conv2d"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.conv2d" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.conv3d">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">conv3d</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">stride</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(1,</span> <span class="pre">1,</span> <span class="pre">1)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(0,</span> <span class="pre">0,</span> <span class="pre">0)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dilation</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(1,</span> <span class="pre">1,</span> <span class="pre">1)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#conv3d"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.conv3d" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.conv_transpose2d">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">conv_transpose2d</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">stride</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(1,</span> <span class="pre">1)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(0,</span> <span class="pre">0)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">output_padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(0,</span> <span class="pre">0)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dilation</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">(1,</span> <span class="pre">1)</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#conv_transpose2d"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.conv_transpose2d" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.cos">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">cos</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.COS:</span> <span class="pre">7></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.cos" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.cp_split_plugin">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">cp_split_plugin</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_request_types</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_context_lengths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">cp_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">cp_rank</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#cp_split_plugin"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.cp_split_plugin" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to perform splitting for context parallelism.</p>
|
||
<p>This operation split the input_ids into cp_size chunks, and return the cp_rank-th
|
||
chunk.
|
||
When the seqlen % cp_size != 0, the chunk sizes of each rank would be
|
||
[seqlen // cp_size, seqlen // cp_size, …, seqlen - (seqlen // cp_size) * cp_size]</p>
|
||
<p>It inserts a IPluginV3Layer.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor contains the indices to split.</p></li>
|
||
<li><p><strong>host_request_types</strong> – Tensor = None (On CPU)
|
||
The tensor on the host that indicates if a request is in context or
|
||
generation phase. Its shape is [batch_size]. See Inflight Batching
|
||
in docs/gpt_attention.md,</p></li>
|
||
<li><p><strong>host_context_lengths</strong> – Tensor = None (On CPU)
|
||
A host tensor that contains the lengths of the different inputs</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The output split tensor.
|
||
The length of the output split tensor.
|
||
The index for rebuilding the sequence</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.create_allreduce_plugin">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">create_allreduce_plugin</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">network</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">INetworkDefinition</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ITensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">workspace</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ITensor</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">array</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataType</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">all_reduce_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.AllReduceParams" title="tensorrt_llm.functional.AllReduceParams"><span class="pre">AllReduceParams</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#create_allreduce_plugin"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.create_allreduce_plugin" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.cuda_stream_sync">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">cuda_stream_sync</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input_list</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">side_stream_id</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.SideStreamIDType" title="tensorrt_llm.functional.SideStreamIDType"><span class="pre">SideStreamIDType</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#cuda_stream_sync"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.cuda_stream_sync" title="Link to this definition">#</a></dt>
|
||
<dd><p>Wait for the side stream on the main stream.
|
||
output = input_list[0]</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input_list</strong> – List[Tensor] (On GPU)
|
||
The list of input tensors.</p></li>
|
||
<li><p><strong>side_stream_id</strong> – int (On CPU)
|
||
The side stream ID.</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.cumsum">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">cumsum</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">prefer_plugin</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#cumsum"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.cumsum" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to calculate inclusive cumulative sum of elements of
|
||
a tensor in a given dimension.</p>
|
||
<p>Given an input tensor, that function creates an operation that calculates
|
||
inclusive cumulative sum of elements in the dimension ‘dim’ to create
|
||
a new tensor. The output tensor has the same shape as the input tensor.</p>
|
||
<p>The input tensor must have rank >= 1. The ‘dim’ must be valid, and negative
|
||
value is supported.</p>
|
||
<p>For example, on input=[[4, 2, 5], [2, 1, 2], [4, 7, 1]], which has a shape
|
||
[3, 3],</p>
|
||
<blockquote>
|
||
<div><p>cumsum(input, 0)</p>
|
||
</div></blockquote>
|
||
<p>will produce [[4, 2, 5], [6, 3, 7], [10, 10, 8]].</p>
|
||
<blockquote>
|
||
<div><p>cumsum(input, 1)</p>
|
||
</div></blockquote>
|
||
<p>will produce [[4, 6, 11], [2, 3, 5], [4, 11, 12]].</p>
|
||
<p>That operation is implemented by TensorRT ILoopLayer.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to calculate the inclusive cumulative sum.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension to calculate the inclusive cumulative sum. Negative
|
||
value is supported.</p></li>
|
||
<li><p><strong>prefer_plugin</strong> – bool
|
||
Whether to use the cumsumLastDim plugin if dim is last dim.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor containing the inclusive cumulative sum of input.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.div">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">div</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.DIV:</span> <span class="pre">5></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.div" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.dora_plugin">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">dora_plugin</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">activations</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">out_hidden_sizes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">lora_weights_pointers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_request_types</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_context_lengths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#dora_plugin"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.dora_plugin" title="Link to this definition">#</a></dt>
|
||
<dd><p>The DoRA plugin applies column-wise scaling to the output of a LoRA layer.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor (On GPU)
|
||
The input tensor. Its shape is [batch_size, seq_len, dim] or [num_tokens, dim] for remove_input_padding</p></li>
|
||
<li><p><strong>out_hidden_sizes</strong> – list[int]
|
||
The output hidden size of each adapter in the related LoRA module.
|
||
For example, for a qkv projection out_hidden_sizes should be [q_dim, k_dim, v_dim].</p></li>
|
||
<li><p><strong>host_request_types</strong> – Tensor = None
|
||
The tensor on the host that indicates if a request is in context or
|
||
generation phase. Its shape is [batch_size]. See Inflight Batching
|
||
in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>host_context_lengths</strong> – cpu Tensor = None
|
||
A host tensor that contains the lengths of the different inputs,</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.einsum">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">einsum</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">einsum_eq</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#einsum"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.einsum" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an Einsum operation.</p>
|
||
<p>That operation maps to tensorrt.IEinsumLayer. As explained in the TensorRT
|
||
documentation, this layer implements a summation over the elements of the
|
||
inputs along dimensions specified by the equation parameter, based on the
|
||
Einstein summation convention. The layer can have one or more inputs of
|
||
rank >= 0. All the inputs must be of same data type. This layer supports
|
||
all TensorRT data types except bool. There is one output tensor of the same
|
||
type as the input tensors. The shape of output tensor is determined by the
|
||
equation.</p>
|
||
<p>The equation specifies ASCII lower-case letters for each dimension in the
|
||
inputs in the same order as the dimensions, separated by comma for each
|
||
input. The dimensions labeled with the same subscript must match or be
|
||
able to be broadcasted. Repeated subscript labels in one input take the diagonal.
|
||
Repeating a label across multiple inputs means that those axes will be
|
||
multiplied. Omitting a label from the output means values along those axes
|
||
will be summed. In implicit mode, the indices which appear once in the
|
||
expression will be part of the output in increasing alphabetical order. In
|
||
explicit mode, the output can be controlled by specifying output subscript
|
||
labels by adding an arrow (‘->’) followed by subscripts for the output. For
|
||
example, “ij,jk->ik” is equivalent to “ij,jk”. Ellipsis (‘…’) can be used
|
||
in place of subscripts to broadcast the dimensions. See the TensorRT
|
||
Developer Guide for more details on equation syntax.</p>
|
||
<p>Many common operations can be expressed using the Einsum equation. For
|
||
.. rubric:: Example</p>
|
||
<p>Matrix Transpose: ij->ji
|
||
Sum: ij-> Matrix-Matrix
|
||
Multiplication: ik,kj->ij
|
||
Dot Product: i,i->
|
||
Matrix-Vector Multiplication: ik,k->i
|
||
Batch Matrix Multiplication: ijk,ikl->ijl
|
||
Batch Diagonal: …ii->…i</p>
|
||
<p>Note that TensorRT does not support ellipsis or diagonal operations so,
|
||
neither, does TensorRT-LLM.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>einsum_eq</strong> – str
|
||
The Einsum equation.</p></li>
|
||
<li><p><strong>inputs</strong> – Sequence[Tensor]
|
||
The sequence of inputs consumed by the Einsum operation.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the Einsum operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.elementwise_binary">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">elementwise_binary</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ElementWiseOperation</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#elementwise_binary"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.elementwise_binary" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.embedding">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">embedding</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tp_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tp_group</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">sharding_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tp_rank</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">per_token_scale</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#embedding"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.embedding" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to perform embedding lookup.</p>
|
||
<p>That operation performs the embedding lookup. The ‘input’ tensor contains
|
||
the identifiers of the rows of ‘weight’ to gather.</p>
|
||
<p>1. Distribute the embedding lookup table over multiple GPU
|
||
When ‘tp_size’ is greater than 1 and the ‘tp_group’ is defined, this
|
||
embedding lookup is distributed among multiple GPUs.</p>
|
||
<p>When ‘sharding_dim==0’, each GPU stores a subset of the rows of the embedding
|
||
table rows(that number of rows per GPU is given by weights.shape[0] and the offset to
|
||
the 1st row stored on the GPU is given by rank * weights.shape[0]). Each
|
||
parallel rank will query all the indices and set 0s for the weights that
|
||
are not stored on the associated GPU. To compute the final result, a
|
||
parallel all-reduce operation is added to the TensorRT graph. That lookup
|
||
can be performed using either the plugin or the operators TensorRT support.</p>
|
||
<p>When’sharding_dim==1’, each GPU stores a subset of the embedding table’s columns.
|
||
Each rank can obtain a portion of the embedding results.
|
||
Then the embedding is collected using the all-gather operation.
|
||
Related transposition operations are also used to obtain the final results.</p>
|
||
<p>2. Store embedding lookup table as a whole
|
||
When ‘tp_size’ is not greater than 1, the embedding lookup table will not
|
||
be divided. In this case, when the default_net().plugin_config.lookup_plugin is set,
|
||
the operation is implemented using a plugin (without the all-reduce operation).
|
||
Otherwise, this operation is implemented using the standard IGatherLayer in TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor the contains the indices to perform the lookup.</p></li>
|
||
<li><p><strong>weight</strong> – Tensor
|
||
The table to gather from.</p></li>
|
||
<li><p><strong>tp_size</strong> – int
|
||
The number of GPUs collaborating to perform that embedding.</p></li>
|
||
<li><p><strong>tg_group</strong> – Optional[List[int]]
|
||
The group of world ranks participating in the all-reduce when
|
||
tp_size > 1.</p></li>
|
||
<li><p><strong>sharding_dim</strong> – int
|
||
sharding_dim = 0 means that we shard the embedding table in vocab dim;
|
||
sharding_dim = 1 means that we shard the embedding table in embedding dim.</p></li>
|
||
<li><p><strong>tp_rank</strong> – int
|
||
The tensor parallelism rank. Used to calculate offset in TP on vocab dim.</p></li>
|
||
<li><p><strong>padding</strong> – Tensor
|
||
Additional padding added to the end of the embedding table before feeding into gather op.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the embedding lookup layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.eq">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">eq</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.EQUAL:</span> <span class="pre">11></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.eq" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.exp">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">exp</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.EXP:</span> <span class="pre">0></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.exp" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.expand">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">expand</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">expand_shape</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#expand"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.expand" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to expand a tensor.</p>
|
||
<p>The operation expands the input tensor in the singleton dimensions to the
|
||
size indicated by the corresponding dimension in the <cite>expand_shape</cite> tensor.
|
||
In other words, given an input tensor with dimensions of size 1, those
|
||
dimensions will be expanded to the size in <cite>expand_shape</cite>.</p>
|
||
<p>For example, a tensor of shape [4, 3, 1, 3] will be expanded to a tensor of
|
||
shape [4, 3, 2, 3] by the layer created using expand(input, [4, 3, 2, 3]).</p>
|
||
<p>The expansion may either replicate the values or be mapped to a view with a
|
||
stride of 0 in the expanded dimensions. For example, for a tensor [[3, 2]] of
|
||
shape [1, 2],</p>
|
||
<blockquote>
|
||
<div><p>expand([[3, 2]], [2, 2])</p>
|
||
</div></blockquote>
|
||
<p>can be used to expand the input to [[3, 2], [3, 2]].</p>
|
||
<p>This operation is implemented using a tensorrt.ISliceLayer. The current
|
||
implementation does not verify that non singleton dimensions are not
|
||
shrunk. In other words, for an input of shape [4, 1, 2],</p>
|
||
<blockquote>
|
||
<div><p>expand(input, [3, 2, 2])</p>
|
||
</div></blockquote>
|
||
<p>will produce a tensor of shape [3, 2, 2]. That behavior is subject to
|
||
change in the future.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>expand_shape</strong> – Tensor
|
||
The new shape of the expanded tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the expand layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.expand_dims">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">expand_dims</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">shape_cast_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#expand_dims"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.expand_dims" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to expand the tensor shape with singleton dimensions.</p>
|
||
<p>That function adds a tensorrt.IShuffleLayer to the network. Given an ‘input’
|
||
of rank N and a sequence of M dimensions, the output tensor produced by
|
||
this operation (when executed by TensorRT) will have a rank of N+M. Singleton
|
||
dimensions will be inserted at the different positions in ‘dim’.</p>
|
||
<p>The pseudo-code for that operation is:</p>
|
||
<blockquote>
|
||
<div><p>new_shape, ii = [], 0
|
||
for jj in range(input.rank() + len(dim)):</p>
|
||
<blockquote>
|
||
<div><p>new_shape.append(1 if jj in dims else input.shape[ii++])</p>
|
||
</div></blockquote>
|
||
</div></blockquote>
|
||
<p>For example, for a tensor of shape [3, 4, 1, 5]</p>
|
||
<blockquote>
|
||
<div><p>expand_dims(input, [0, 2])</p>
|
||
</div></blockquote>
|
||
<p>will produce a tensor of shape [1, 3, 1, 4, 1, 5].</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to expand.</p></li>
|
||
<li><p><strong>dim</strong> – Union[int, Sequence[int]]
|
||
The positions in the output tensor where to insert singleton
|
||
dimensions.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the shuffle layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.expand_dims_like">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">expand_dims_like</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#expand_dims_like"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.expand_dims_like" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to expand the first tensor to the same rank as the second
|
||
tensor.</p>
|
||
<p>That function takes a first tensor. It also accepts an integer or a float,
|
||
in which case it creates a constant tensor from it. In both cases, the rank
|
||
of that first tensor is compared to the rank of the second tensor. If they
|
||
are of the same rank, the first tensor is returned. Otherwise, the first
|
||
tensor is expanded on the left to match the rank of the second tensor.</p>
|
||
<p>Note that the shapes do not have to match, only the rank is considered in
|
||
that function.</p>
|
||
<p>For example, for a pair of tensors of shapes [3, 4] and [4, 3, 2], the
|
||
first tensor will be expanded to a tensor of rank 3 and shape [1, 3, 4].</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first tensor to expand. When a scalar value is provided as a
|
||
parameter, that function first creates a tensor before expanding it
|
||
(if needed).</p></li>
|
||
<li><p><strong>right</strong> – Tensor
|
||
The reference tensor to match.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the shuffle layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.expand_mask">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">expand_mask</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mask</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tgt_len</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#expand_mask"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.expand_mask" title="Link to this definition">#</a></dt>
|
||
<dd><p>Expand an attention mask.</p>
|
||
<p>That function adds the sequence of operations to expand from a tensor of
|
||
shape ‘[batch_size, src_seq_len]’ to a tensor of shape
|
||
‘[batch_size, 1, tgt_seq_len, src_seq_len]’. It can be used to create the
|
||
mask applied to the Q*K^T product before the softmax operation in the
|
||
multi-head attention block.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>mask</strong> – Tensor
|
||
The input mask</p></li>
|
||
<li><p><strong>tgt_len</strong> – Optional[Tensor]
|
||
The dimension of the 3rd dimension in the output tensor. If None,
|
||
the 2nd dimension of the input is used.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor created by that sequence of operations.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.flatten">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">flatten</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">start_dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">end_dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">-1</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#flatten"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.flatten" title="Link to this definition">#</a></dt>
|
||
<dd><p>Flattens input by reshaping it into a one-dimensional tensor.</p>
|
||
<p>If start_dim or end_dim are passed, only dimensions starting with start_dim and
|
||
ending with end_dim are flattened. The order of elements in input is unchanged.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to flatten.</p></li>
|
||
<li><p><strong>start_dim</strong> – int
|
||
The first dim to flatten.</p></li>
|
||
<li><p><strong>end_dim</strong> – int
|
||
The last dim to flatten.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the flatten layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.flip">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">flip</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dims</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#flip"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.flip" title="Link to this definition">#</a></dt>
|
||
<dd><p>Reverses the order of an n-D tensor along given axis in dims.</p>
|
||
<p>That flip operation maps to a TensorRT ISliceLayer. For the dimensions
|
||
listed in dims it copies the elements from the last one to the first one
|
||
(from (N-1) down to 0 with a step of -1). For the dimensions not in ‘dims’,
|
||
it copies the elements from the first one to the last one (from 0 to N-1
|
||
with a step of 1).</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the cast is applied.</p></li>
|
||
<li><p><strong>dims</strong> – list or tuple
|
||
The axes to flip. Negative indices are supported.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the inserted layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.floordiv">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">floordiv</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.FLOOR_DIV:</span> <span class="pre">7></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.floordiv" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gather">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gather</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">indices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gather"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gather" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to gather elements from a tensor.</p>
|
||
<p>That function implements the GatherElements operator from the ONNX
|
||
specification as described in</p>
|
||
<blockquote>
|
||
<div><p><a class="github reference external" href="https://github.com/onnx/onnx/blob/main/docs/Operators.md#GatherElements">onnx/onnx</a></p>
|
||
</div></blockquote>
|
||
<p>The input and indices arguments must have the same rank >= 1. The operation
|
||
will produce a tensor with the same shape as the indices tensor. The axis
|
||
is the dimension to gather on.</p>
|
||
<p>As shown in the ONNX description, for a 3D tensor, the output is:</p>
|
||
<blockquote>
|
||
<div><p>out[i][j][k] = input[indices[i][j][k]][j][k] if axis = 0,
|
||
out[i][j][k] = input[i][indices[i][j][k]][k] if axis = 1,
|
||
out[i][j][k] = input[i][j][indices[i][j][k]] if axis = 2.</p>
|
||
</div></blockquote>
|
||
<p>For example,</p>
|
||
<blockquote>
|
||
<div><p>gather([[4, 2], [5, 3]], 0, [[1, 0], [0, 1]])</p>
|
||
</div></blockquote>
|
||
<p>will produce [[5, 2], [4, 3]].</p>
|
||
<blockquote>
|
||
<div><p>gather([[1, 2, 3], [4, 5, 6], 1, [[1], [0]])</p>
|
||
</div></blockquote>
|
||
<p>will produce [[2], [4]]. See the ONNX documentation for more examples.</p>
|
||
<p>That operation maps to the TensorRT IGatherLayer.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to gather elements from.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension to gather on.</p></li>
|
||
<li><p><strong>indices</strong> – Union[Tensor, int]
|
||
The positions in the ‘dim’ dimension to gather from.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor containing the gathered elements. It has the same shape as
|
||
the indices tensor.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gather_last_token_logits">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gather_last_token_logits</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">remove_input_padding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gather_last_token_logits"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gather_last_token_logits" title="Link to this definition">#</a></dt>
|
||
<dd><p>Extract the logits that correspond to the last token from the hidden states.</p>
|
||
<p>That function adds the operations to extract the logits of the last tokens
|
||
in a batch of sequences.</p>
|
||
<p>Depending on whether ‘remove_input_padding’ is ‘True’ or ‘False’, that
|
||
function assumes inputs of different shapes.</p>
|
||
<p>When ‘remove_input_padding’ is ‘True’, the ‘hidden_states’ tensor is
|
||
assumed to be packed. It has a shape ‘[num_tokens, hidden_dim]’ where
|
||
‘num_tokens’ is the sum of the lengths of the sequences in the batch and
|
||
‘hidden_dim’ is the hidden dimension. The ‘last_tokens_ids’ is a 1D tensor
|
||
that encodes the inclusive prefix-sums of the lengths of the sequences in
|
||
the batch.</p>
|
||
<p>When ‘remove_input_padding’ is ‘False’, the ‘hidden_states’ tensor is
|
||
assumed to be padded. It has a shape ‘[batch_size, max_seqlen, hidden_dim]’
|
||
where ‘max_seqlen’ is the length of the longest sequence in the batch and
|
||
‘hidden_dim’ is the hidden dimension. The ‘last_token_ids’ is a 1D tensor
|
||
that encodes the length of each sequence in the batch.</p>
|
||
<p>In both cases, that function produces a tensor of shape ‘[batch_size,
|
||
hidden_size]’ where the row at index ‘i’ corresponds to the logits of the
|
||
last token from the ‘i’-th sequence.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>hidden_states</strong> – Tensor
|
||
The hidden states</p></li>
|
||
<li><p><strong>last_token_ids</strong> – Tensor
|
||
The inclusive prefix-sum of the lengths or the lengths of the
|
||
sequences in the batch.</p></li>
|
||
<li><p><strong>remove_input_padding</strong> – bool
|
||
Indicate if the hidden_states are packed (‘True’) or padded
|
||
(‘False’).</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor created by that sequence of operations.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gather_nd">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gather_nd</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">indices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">batch_dims</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gather_nd"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gather_nd" title="Link to this definition">#</a></dt>
|
||
<dd><p>Adds a layer that performs a gather with some element-wise dimensions.
|
||
See: <a class="reference external" href="https://onnx.ai/onnx/operators/onnx__GatherND.html">https://onnx.ai/onnx/operators/onnx__GatherND.html</a>
|
||
The gather is performed on dim=batch_dims.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The tensor on which the gather operation is performed.</p></li>
|
||
<li><p><strong>indices</strong> – Tensor
|
||
The tensor that indicates which entries to be gathered.</p></li>
|
||
<li><p><strong>batch_dims</strong> – int
|
||
The number of first dimensions that should be skipped before gather starts.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor created by the gather layer with GatherMode.ND.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gegelu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gegelu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gegelu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gegelu" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.geglu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">geglu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#geglu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.geglu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a Gated-GELU operation.</p>
|
||
<p>That function takes a tensor, splits it into two halves along the last
|
||
dimension, applies GELU to the second half and multiply the results. The
|
||
behavior is undefined if the last dimension is not even.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gelu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gelu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gelu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gelu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a GELU operation.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gemm_allreduce">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gemm_allreduce</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">a</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">b</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">transa</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">transb</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ndarray</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">output_dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataType</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">fp8_inputs_override</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">a_sf</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">b_sf</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gemm_allreduce"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gemm_allreduce" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that performs fused GEMM+AllReduce.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>a</strong> – Tensor
|
||
Input tensor A</p></li>
|
||
<li><p><strong>b</strong> – Tensor
|
||
Input tensor B</p></li>
|
||
<li><p><strong>a_sf</strong> – Optional[Tensor]
|
||
Input tensor for scaling input A</p></li>
|
||
<li><p><strong>b_sf</strong> – Optional[Tensor]
|
||
Input tensor for scaling input B</p></li>
|
||
<li><p><strong>group</strong> – List[int]
|
||
Ranks participating in collective</p></li>
|
||
<li><p><strong>transa</strong> – bool
|
||
Whether or not input tensor A is transposed</p></li>
|
||
<li><p><strong>transb</strong> – bool
|
||
Whether or not input tensor B is transposed</p></li>
|
||
<li><p><strong>alpha</strong> – float
|
||
Alpha for GEMM -> beta * C + (alpha * acc)</p></li>
|
||
<li><p><strong>output_dtype</strong> – trt.DataType
|
||
Output type for plugin. If it is None, we
|
||
will use type set in plugin_config.</p></li>
|
||
<li><p><strong>fp8_inputs_override</strong> – bool
|
||
TRT graph does not detect FP8 inputs correctly. This
|
||
flag is used to override the derived input tensor
|
||
types so that our plugin knows to issue FP8 MMAs.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>Returns GEMM output tensor which has been reduced across ranks.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gemm_swiglu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gemm_swiglu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_d0</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_d1</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gemm_swiglu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gemm_swiglu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a matrix multiplication, followed by SwiGLU (<cite>x * SiLU(gate)</cite>) operation.</p>
|
||
<p>The second SwiGLU operation takes the preceding tensor, splits it into two halves
|
||
along the last dimension, applies SiLU to the second half and multiply the results. The
|
||
behaviour is undefined if the last dimension is not even.</p>
|
||
<blockquote>
|
||
<div><p>Parameters:
|
||
input : Tensor</p>
|
||
<blockquote>
|
||
<div><p>The first tensor (often called A).</p>
|
||
</div></blockquote>
|
||
<dl>
|
||
<dt>weight<span class="classifier">Tensor</span></dt><dd><p>The second tensor (often called B).</p>
|
||
</dd>
|
||
<dt>bias<span class="classifier">Optional[Tensor]</span></dt><dd><p>The per-channel bias. The plugin with fp8 dtype does not support bias yet.</p>
|
||
</dd>
|
||
<dt>scale_d0<span class="classifier">float</span></dt><dd><p>The scale for dequantizing x, used for fp8</p>
|
||
</dd>
|
||
<dt>scale_d1<span class="classifier">float</span></dt><dd><p>The scale for dequantizing gate, used for fp8</p>
|
||
</dd>
|
||
<dt>scale_output<span class="classifier">float</span></dt><dd><p>The scale for quantizing output, used for fp8</p>
|
||
<blockquote>
|
||
<div><p>Returns:</p>
|
||
</div></blockquote>
|
||
</dd>
|
||
</dl>
|
||
<p>The tensor produced by the inserted layer.</p>
|
||
</div></blockquote>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.generate_alibi_biases">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">generate_alibi_biases</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">slopes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">key_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#generate_alibi_biases"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.generate_alibi_biases" title="Link to this definition">#</a></dt>
|
||
<dd><p>Compute the ALiBi biases as described in <a class="reference external" href="https://arxiv.org/abs/2211.05100">https://arxiv.org/abs/2211.05100</a>.</p>
|
||
<p>The ALiBi biases are added to the result of the Q*K^T product in the
|
||
multi-head attention block.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>slopes</strong> – Tensor
|
||
The slopes.</p></li>
|
||
<li><p><strong>key_length</strong> – Tensor
|
||
The size of the K vector per head.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A constant tensor that contains the ALiBi biases.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.generate_alibi_slopes">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">generate_alibi_slopes</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tp_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tp_rank</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">alibi_scale</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">alibi_bias_max</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">8</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">ndarray</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#generate_alibi_slopes"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.generate_alibi_slopes" title="Link to this definition">#</a></dt>
|
||
<dd><p>Compute the ALiBi slopes as described in <a class="reference external" href="https://arxiv.org/abs/2211.05100">https://arxiv.org/abs/2211.05100</a>.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>num_heads</strong> – int
|
||
The number of heads.</p></li>
|
||
<li><p><strong>dtype</strong> – trt.DataType
|
||
The data type of the returned slopes</p></li>
|
||
<li><p><strong>tp_size</strong> – int
|
||
The tensor parallelism size</p></li>
|
||
<li><p><strong>tp_rank</strong> – int
|
||
The tensor parallelism rank</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A constant tensor that contains the ALiBi slopes.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.generate_logn_scaling">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">generate_logn_scaling</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">seq_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">8192</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">32768</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">ndarray</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#generate_logn_scaling"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.generate_logn_scaling" title="Link to this definition">#</a></dt>
|
||
<dd><p>Compute the Log-N scaling vector for Qwen inference extrapolation</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>seq_length</strong> – int
|
||
The max seq length in training (default to 8192 in Qwen-1)</p></li>
|
||
<li><p><strong>max_position_embeddings</strong> – int
|
||
The max position embeddings. (default to 32768 in Qwen-1)</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A constant np.ndarray that contains logn scaling vector</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gpt_attention">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gpt_attention</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">*,</span> <span class="pre">qkv:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">past_key_value:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">attention_mask:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">attention_packed_mask:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">sequence_length:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">host_past_key_value_lengths:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None,</span> <span class="pre">host_max_attention_window_sizes:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">host_sink_token_length:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">context_lengths:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None,</span> <span class="pre">cache_indirection:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None,</span> <span class="pre">host_request_types:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">layer_idx:</span> <span class="pre">int,</span> <span class="pre">num_heads:</span> <span class="pre">int,</span> <span class="pre">num_kv_heads:</span> <span class="pre">int,</span> <span class="pre">hidden_size_per_head:</span> <span class="pre">int,</span> <span class="pre">q_scaling:</span> <span class="pre">float,</span> <span class="pre">attn_logit_softcapping_scale:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">0.0,</span> <span class="pre">rotary_embedding_dim:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">rotary_embedding_base:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">10000.0,</span> <span class="pre">rotary_embedding_scale_type:</span> <span class="pre">~tensorrt_llm.functional.RotaryScalingType</span> <span class="pre">=</span> <span class="pre">RotaryScalingType.none,</span> <span class="pre">rotary_embedding_short_m_scale:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0,</span> <span class="pre">rotary_embedding_long_m_scale:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0,</span> <span class="pre">rotary_embedding_scale:</span> <span class="pre">float</span> <span class="pre">=</span> <span class="pre">1.0,</span> <span class="pre">rotary_embedding_max_positions:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1024,</span> <span class="pre">rotary_embedding_original_max_positions:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1024,</span> <span class="pre">position_embedding_type:</span> <span class="pre">~tensorrt_llm.functional.PositionEmbeddingType</span> <span class="pre">=</span> <span class="pre">PositionEmbeddingType.learned_absolute,</span> <span class="pre">rotary_inv_freq:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">rotary_cos_sin:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">kv_orig_quant_scale:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">kv_quant_orig_scale:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">attention_output_orig_quant_scale:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">attention_output_sf_scale:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">kv_cache_quant_mode:</span> <span class="pre">~tensorrt_llm._utils.QuantModeWrapper</span> <span class="pre">|</span> <span class="pre">~tensorrt_llm.quantization.mode.QuantMode</span> <span class="pre">=</span> <span class="pre">QuantMode.None,</span> <span class="pre">max_context_length:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">mask_type:</span> <span class="pre">~tensorrt_llm.functional.AttentionMaskType</span> <span class="pre">=</span> <span class="pre">AttentionMaskType.causal,</span> <span class="pre">block_sparse_block_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">64,</span> <span class="pre">block_sparse_homo_head_pattern:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False,</span> <span class="pre">block_sparse_num_local_blocks:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">16,</span> <span class="pre">block_sparse_vertical_stride:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">8,</span> <span class="pre">alibi_slopes:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">tp_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1,</span> <span class="pre">tp_rank:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">vision_start:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">-1,</span> <span class="pre">vision_length:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">-1,</span> <span class="pre">kv_cache_block_offsets:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">host_kv_cache_block_offsets:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">host_kv_cache_pool_pointers:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">host_kv_cache_pool_mapping:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">do_cross_attention:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False,</span> <span class="pre">cross_kv:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">cross_kv_length:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">encoder_input_lengths:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">relative_attention_bias:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">logn_scaling:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">max_distance:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">host_context_lengths:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">qkv_bias:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">use_cache:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">True,</span> <span class="pre">spec_decoding_is_generation_length_variable:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False,</span> <span class="pre">spec_decoding_max_generation_length:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">spec_decoding_generation_lengths:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">spec_decoding_position_offsets:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">spec_decoding_packed_mask:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">spec_decoding_use:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">long_rope_rotary_inv_freq:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">long_rope_rotary_cos_sin:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">mrope_rotary_cos_sin:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">mrope_position_deltas:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">host_runtime_perf_knobs:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">host_context_progress:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">is_mla_enabled_flag:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False,</span> <span class="pre">q_lora_rank:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">kv_lora_rank:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">qk_nope_head_dim:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">qk_rope_head_dim:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">v_head_dim:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">q_b_proj:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">kv_b_proj:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">k_b_proj_trans:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">skip_attn=None,</span> <span class="pre">cp_group:</span> <span class="pre">~typing.List[int]</span> <span class="pre">=</span> <span class="pre">[0],</span> <span class="pre">cp_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1,</span> <span class="pre">cp_rank:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0,</span> <span class="pre">num_kv_heads_origin:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">-1</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#gpt_attention"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.gpt_attention" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that performs the multi-head attention in GPT-like models.</p>
|
||
<p>The signature of the function will change in the future release - we are in
|
||
the process of simplifying the API. The current version is still
|
||
work-in-progress! The following API is provided with hints regarding the
|
||
arguments that are likely to be removed or merged with others in the future
|
||
release.</p>
|
||
<p>See docs/source/advanced/gpt-attention.md for the documentation of that function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>qkv</strong> – Tensor (On GPU)
|
||
The input QKV tensor. Its shape is [batch_beam_size, max_seqlen, qkv_dim] in padded mode and [num_tokens, qkv_dim] in
|
||
packed mode. Where qkv_dim depends on using MQA, GQA, or MHA. See QKV Input in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>past_key_value</strong> – Tensor (On GPU)
|
||
The tensor that stores KV cache data. Its shape is
|
||
[max_batch_size * max_beam_width, 2, num_kv_heads, max_seqlen, hidden_dim_per_head]
|
||
in contiguous mode and
|
||
[max_blocks, 2, num_kv_heads, num_tokens_per_block, hidden_dim_per_head]
|
||
in paged mode. See KV Cache in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>attention_mask</strong> – Tensor (On GPU)
|
||
The tensor that stores the attention mask for unfused MHA or MMHA.
|
||
Its shape is [num_tokens, max_kv_seqlen].</p></li>
|
||
<li><p><strong>attention_packed_mask</strong> – Tensor (On GPU)
|
||
The tensor that stores the packed custom mask for fmha.
|
||
Its shape is [num_tokens, max_kv_seqlen / 32], where each bit represents one mask position.</p></li>
|
||
<li><p><strong>sequence_lengths</strong> – Tensor (On GPU)
|
||
The tensor that stores the length of each sequence. Its shape is
|
||
[batch_size]. See QKV Input in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>host_past_key_value_lengths</strong> – Tensor (On CPU)
|
||
An INT32 tensor of shape [batch_size],</p></li>
|
||
<li><p><strong>host_max_attention_window_sizes</strong> – Tensor (On CPU)
|
||
An INT32 tensor of shape [1].
|
||
by default, the max_attention_window_size is determined by the shape of cache_indir_table.
|
||
And we support independent max_attention_window_size for each layer.
|
||
This controls the sliding-window-attention/cyclic-kv-cache features.</p></li>
|
||
<li><p><strong>context_lengths</strong> – Tensor (On GPU)
|
||
The tensor that stores the context-phase sequence length of each request. Its shape
|
||
is [batch_size]. See QKV Input in doc/functional.py,</p></li>
|
||
<li><p><strong>cache_indirection</strong> – Tensor (On GPU)
|
||
The tensor to reconstruct the paths when using beam-search. Its
|
||
shape is [batch_size, beam_width, max_seqlen]. See Beam-Search in
|
||
docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>host_request_types</strong> – Tensor = None (On CPU)
|
||
The tensor on the host that indicates if a request is in context or
|
||
generation phase. Its shape is [batch_size]. See Inflight Batching
|
||
in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>layer_idx</strong> – int
|
||
The index of this attention layer, used to access kv_cache_block_offsets,</p></li>
|
||
<li><p><strong>num_heads</strong> – int
|
||
The number of heads,</p></li>
|
||
<li><p><strong>num_kv_heads</strong> – int
|
||
The number of KV heads, generic to handle MHA/MQA/GQA,</p></li>
|
||
<li><p><strong>hidden_size_per_head</strong> – int
|
||
The hidden size per head,</p></li>
|
||
<li><p><strong>q_scaling</strong> – float
|
||
The value used to compute the scaling factor applied to the output
|
||
of the Q*K^T product. See Scaling Factors in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>attn_logit_softcapping_scale</strong> – float
|
||
The scale * tanh(value / scale) used to compute the scaling factor applied to the output
|
||
of the Q*K^T product.</p></li>
|
||
<li><p><strong>rotary_embedding_dim</strong> – int
|
||
The dimension to compute RoPE. Use 0 when position_embedding_type is not RoPE.</p></li>
|
||
<li><p><strong>rotary_embedding_base</strong> – float
|
||
The theta value to use for RoPE. Ignored when position_embedding_type is not RoPE.</p></li>
|
||
<li><p><strong>rotary_embedding_scale_type</strong> – <p>RotaryScalingType
|
||
The scaling type of RoPE. Ignored when position_embedding_type is not RoPE.
|
||
Possible rotary scaling type:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p>RotaryScalingType.none</p></li>
|
||
<li><p>RotaryScalingType.linear</p></li>
|
||
<li><p>RotaryScalingType.dynamic</p></li>
|
||
<li><p>RotaryScalingType.longrope</p></li>
|
||
<li><p>RotaryScalingType.llama3</p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</p></li>
|
||
<li><p><strong>rotary_embedding_scale</strong> – float
|
||
The scale value to use for linear/dynamic scaling in RoPE.
|
||
Ignored when position_embedding_type is not RoPE.
|
||
Must be set to 1 (default) if rotary_embedding_scale_type is <cite>none</cite>.</p></li>
|
||
<li><p><strong>rotary_inv_freq</strong> – float Tensor
|
||
The rotary inv freq with shape [head_size / 2].</p></li>
|
||
<li><p><strong>rotary_cos_sin</strong> – float2(cos/sin) Tensor
|
||
The rotary cos/sin cache, which will be reused among different requests.
|
||
It is taken as constant tensor.</p></li>
|
||
<li><p><strong>rotary_embedding_max_positions</strong> – int
|
||
Needed only for <cite>dynamic</cite> RoPE scaling. Ignored otherwise.</p></li>
|
||
<li><p><strong>position_embedding_type</strong> – <p>PositionEmbeddingType
|
||
The position embedding type:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p>PositionEmbeddingType.learned_absolute</p></li>
|
||
<li><p>PositionEmbeddingType.relative</p></li>
|
||
<li><p>PositionEmbeddingType.rope_gptj</p></li>
|
||
<li><p>PositionEmbeddingType.rope_gpt_neox</p></li>
|
||
<li><p>PositionEmbeddingType.alibi</p></li>
|
||
<li><p>PositionEmbeddingType.alibi_with_scale</p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</p></li>
|
||
<li><p><strong>kv_orig_quant_scale</strong> – Tensor
|
||
The tensor to store the scaling factor for quantization to INT8/FP8
|
||
in the KV cache. Its shape is [1]. See INT8/FP8 KV Cache in
|
||
docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>kv_quant_orig_scale</strong> – Tensor
|
||
The tensor to store the scaling factor for dequantization from
|
||
INT8/FP8 in the KV cache. Its shape is [1]. See INT8/FP8 KV Cache
|
||
in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>attention_output_orig_quant_scale</strong> – Tensor
|
||
The tensor to store the scaling factor for quantization to FP8
|
||
in the KV cache. Its shape is [1].</p></li>
|
||
<li><p><strong>kv_cache_quant_mode</strong> – QuantMode (int flags)
|
||
Do we enable the INT8 or FP8 KV cache?</p></li>
|
||
<li><p><strong>max_context_length</strong> – int32_t
|
||
The length of the longest input sequence. See QKV Input in
|
||
docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>mask_type</strong> – <p>int = 1
|
||
The type of mask:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p>tensorrt_llm.layers.AttentionMaskType.padding for BERT,</p></li>
|
||
<li><p>tensorrt_llm.layers.AttentionMaskType.causal for GPT,</p></li>
|
||
<li><p>tensorrt_llm.layers.AttentionMaskType.sliding_window_causal for GPT,</p></li>
|
||
<li><p>tensorrt_llm.layers.AttentionMaskType.bidirectional for ChatGLM-6B,</p></li>
|
||
<li><p>tensorrt_llm.layers.AttentionMaskType.bidirectionalglm for GLM-10B,</p></li>
|
||
<li><p>tensorrt_llm.layers.AttentionMaskType.blocksparse for Phi-3-small,</p></li>
|
||
<li><p>tensorrt_llm.layers.AttentionMaskType.custom_mask for any models.</p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</p></li>
|
||
<li><p><strong>block_sparse_block_size</strong> – int
|
||
Block size in block sparse attention</p></li>
|
||
<li><p><strong>block_sparse_homo_head_pattern</strong> – bool
|
||
Do all attention heads share same vertical stride pattern?</p></li>
|
||
<li><p><strong>block_sparse_num_local_blocks</strong> – int
|
||
Number of active blocks near diagonal</p></li>
|
||
<li><p><strong>block_sparse_vertical_stride</strong> – int
|
||
Stride of active blocks in vertical dimension</p></li>
|
||
<li><p><strong>alibi_slopes</strong> – Tensor
|
||
The ALiBi slopes. The ALiBi bias is computed on-the-fly in the kernel
|
||
when possible,</p></li>
|
||
<li><p><strong>tp_size</strong> – int
|
||
The number of processes/GPUs when tensor parallelism is activated,</p></li>
|
||
<li><p><strong>tp_rank</strong> – int
|
||
The rank of that process (when running tensor parallelism),</p></li>
|
||
<li><p><strong>kv_cache_block_offsets</strong> – The tensor of block offsets for the KV cache. Its shape is
|
||
[num_layers, max_batch_size, max_beam_width, 2, max_blocks_per_sequence * 2],
|
||
See KV cache section in docs/source/advanced/gpt-attention.md, on gpu,</p></li>
|
||
<li><p><strong>host_kv_cache_block_offsets</strong> – The same as kv_cache_block_offsets, but on cpu,</p></li>
|
||
<li><p><strong>host_kv_cache_pool_pointers</strong> – The tensor of pool pointers for the KV cache. Its shape is [num_layers, 2],
|
||
See KV cache section in docs/source/advanced/gpt-attention.md, on gpu,</p></li>
|
||
<li><p><strong>host_kv_cache_pool_mapping</strong> – The tensor of pool mapping for the different memory pools. Its shape is [num_layers,2] - for each layer, the index of the pool, and the index of the layer within the pool,</p></li>
|
||
<li><p><strong>do_cross_attention</strong> – bool = False
|
||
Do we use this as cross attention instead of self attention,</p></li>
|
||
<li><p><strong>cross_kv</strong> – Tensor = None
|
||
The KV tensor of encoder output hidden states. Its shape is [batch_size, max_seqlen, 2 * kvHeadNum * headSize] in padded mode and [1, num_tokens, 2 * kvHeadNum * headSize] in
|
||
packed mode,</p></li>
|
||
<li><p><strong>cross_kv_length</strong> – Tensor = None
|
||
The length of the longest encoder output sequence,</p></li>
|
||
<li><p><strong>encoder_input_lengths</strong> – Tensor
|
||
The tensor that stores the length of each encoder input sequence. Its shape is [batch_size],</p></li>
|
||
<li><p><strong>logn_scaling</strong> – Tensor = None
|
||
The logn scaling tensor [max_position_embedding_len], which is applied to q in order to help extrapolation</p></li>
|
||
<li><p><strong>relative_attention_bias</strong> – Tensor = None
|
||
The relative attention bias [num_heads, max_seq_len, max_seq_len], or The relative attention embedding table for implicit mode, [num_heads, num_buckets].</p></li>
|
||
<li><p><strong>max_distance</strong> – int = 0
|
||
The maximum distance of relative position in attention, for implicit mode.
|
||
Default value is 0, meaning to use the regular mode of relative attention bias.
|
||
Implicit mode is only enabled when passing in non-zero positive max_distance value.
|
||
See relative attention bias in docs/source/advanced/gpt-attention.md</p></li>
|
||
<li><p><strong>host_context_lengths</strong> – Tensor = None (On CPU)
|
||
A host tensor that contains the lengths of the different inputs,</p></li>
|
||
<li><p><strong>qkv_bias</strong> – Tensor = None,
|
||
The qkv bias tensor.</p></li>
|
||
<li><p><strong>use_cache</strong> – bool = False
|
||
Do we need to store kv cache ? not needed if there is no generation phase.</p></li>
|
||
<li><p><strong>spec_decoding_is_generation_length_variable</strong> – bool = False,
|
||
Whether the generation lengths can be different for each sequence in a batch.
|
||
For Medusa, this should be set False.
|
||
For Redrafter, this should be set to True.</p></li>
|
||
<li><p><strong>spec_decoding_max_generation_length</strong> – int = 1,
|
||
The maximum number of tokens possible in the generation phase per sequence.</p></li>
|
||
<li><p><strong>spec_decoding_generation_lengths</strong> – Tensor = None,
|
||
The generation phase tokens’ lengths for each sequence.
|
||
Shape: [batch_size]</p></li>
|
||
<li><p><strong>spec_decoding_position_offsets</strong> – Tensor = None,
|
||
The speculative decoding tokens’s position offsets (shared by all sequences).
|
||
Shape: [batch_size, num_draft_tokens + 1].</p></li>
|
||
<li><p><strong>spec_decoding_packed_mask</strong> – <p>Tensor = None,
|
||
The speculative decoding tokens’s attention mask (packed into uint32_t bits).
|
||
remove_input_padding is False:</p>
|
||
<blockquote>
|
||
<div><p>Shape: [batch_size, num_draft_tokens + 1, divUp(num_draft_tokens + 1, 32)].</p>
|
||
</div></blockquote>
|
||
<dl class="simple">
|
||
<dt>remove_input_padding is True:</dt><dd><p>Shape: [sum(spec_decoding_generation_lengths), divUp(num_draft_tokens + 1, 32)].</p>
|
||
</dd>
|
||
</dl>
|
||
</p></li>
|
||
<li><p><strong>long_rope_rotary_inv_freq</strong> – float Tensor
|
||
Additional rotary inv freq used for longer sequence lengths. Shape: [head_size / 2]</p></li>
|
||
<li><p><strong>long_rope_rotary_cos_sin</strong> – float2(cos/sin) Tensor
|
||
Additional rotary cos/sin cache used for longer sequence lengths.</p></li>
|
||
<li><p><strong>is_mla_enable</strong> – bool = False
|
||
Do we need to enable deepseekv2 mla?</p></li>
|
||
<li><p><strong>host_runtime_perf_knobs</strong> – Tensor = None,
|
||
The runtime perf knobs bit mask, controls whether to use certain perf knob in the runtime.</p></li>
|
||
<li><p><strong>host_context_progress</strong> – Tensor = None,
|
||
The structure used to track layer-wise progress in context phase.</p></li>
|
||
<li><p><strong>skip_attn</strong> – Tensor = None,
|
||
A bool tensor on CPU. If it is true, don’t run attention plugin, returning directly.</p></li>
|
||
<li><p><strong>num_kv_heads_origin</strong> – int
|
||
The origin number of KV heads, without the process of TP</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.group_norm">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">group_norm</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1e-05</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#group_norm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.group_norm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.gt">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">gt</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.GREATER:</span> <span class="pre">12></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.gt" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.identity">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">identity</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#identity"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.identity" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an identity operation.</p>
|
||
<p>TODO: Document why it can be done using a plugin!!!</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>input</strong> – Tensor
|
||
The input tensor.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this identity operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.index_select">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">index_select</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">index</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#index_select"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.index_select" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to select slices of elements from a tensor.</p>
|
||
<p>Given an input tensor, that function creates an operation that selects the
|
||
slices of elements in the dimension ‘dim’ at the indices listed in ‘index’
|
||
to create a new tensor. The output tensor has the same rank as the input
|
||
tensor.</p>
|
||
<p>The ‘index’ is a tensor of rank 1.</p>
|
||
<p>For example, on input=[[4, 2, 5], [2, 1, 2], [4, 7, 1]], which has a shape
|
||
[3, 3],</p>
|
||
<blockquote>
|
||
<div><p>index_select(input, 0, [0, 1])</p>
|
||
</div></blockquote>
|
||
<p>will create a tensor of shape [2, 3] that contains the [[4, 2, 5], [2, 1, 2]].</p>
|
||
<p>Regarding the shape of the output tensor, the dimension ‘dim’ has the same
|
||
size as the ‘index’ tensor. It means that for a input tensor of shape [4, 2, 6, 3],</p>
|
||
<blockquote>
|
||
<div><p>index_select(input, 2, [1, 4])</p>
|
||
</div></blockquote>
|
||
<p>will select the 2nd and 5th slices (index == 1 or 4) from the 3rd dimension
|
||
(dim == 2) and return a tensor of shape [4, 2, 2, 3] (i.e. the 3rd
|
||
dimension is shrunk to 2).</p>
|
||
<p>Note that this operation can also be used to expand a tensor in the ‘dim’
|
||
dimension, for example, on input [[0, 1], [2, 3]],</p>
|
||
<blockquote>
|
||
<div><p>index_select(input, 1, [0, 0, 0])</p>
|
||
</div></blockquote>
|
||
<p>will produce a tensor of shape [2, 3] containing [[0, 0, 0], [2, 2, 2]].</p>
|
||
<p>That operation maps to the TensorRT IGatherLayer.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to select from.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension to select from.</p></li>
|
||
<li><p><strong>index</strong> – Tensor
|
||
The indices of the slices in the ‘dim’ dimension to select.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor containing the selected slices.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.int_clip">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">int_clip</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">lower</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">upper</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#int_clip"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.int_clip" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.interpolate">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">interpolate</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_factor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'nearest'</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">align_corners</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">recompute_scale_factor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">antialias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#interpolate"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.interpolate" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.is_gated_activation">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">is_gated_activation</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">activation</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#is_gated_activation"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.is_gated_activation" title="Link to this definition">#</a></dt>
|
||
<dd><p>Is a given activation function gated?</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>activation</strong> – str
|
||
The name of the activation function.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>True if the function is gated, False otherwise.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.layer_norm">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">layer_norm</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">normalized_shape</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1e-05</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">use_diff_of_squares</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#layer_norm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.layer_norm" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a layer-norm operation on a tensor.</p>
|
||
<p>That operation applies the layer-normalization to its input tensor. In its
|
||
simplest form, for large language models, the ‘normalized_shape’ should be
|
||
set to the hidden dimension of the activation tensor. Otherwise, it is the
|
||
shape of the normalized fraction of the tensor (starting from the
|
||
right-most dimension).</p>
|
||
<p>The ‘weight’ tensor corresponds to ‘gamma’ in the layer-norm formula and
|
||
‘bias’ is ‘beta’. The ‘eps’ value is added to the variance before computing
|
||
the squared-root.</p>
|
||
<p>This implementation (when using the plugin) supports an additional flag to
|
||
enable/disable the use of a difference of squares (‘Var = Mean(X^2) -
|
||
Mean(X)^2’).</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The tensor to normalize.</p></li>
|
||
<li><p><strong>normalized_shape</strong> – Union[int, Tuple[int]]
|
||
The shape of the sub-tensor that is normalized. Use ‘hidden_dim’ to
|
||
normalize the inner-most dimension of an activation tensor in LLMs.</p></li>
|
||
<li><p><strong>weight</strong> – Optional[Tensor] = None
|
||
The ‘gamma’ term in layer-norm. Its shape must be
|
||
‘normalized_shape’.</p></li>
|
||
<li><p><strong>bias</strong> – Optional[Tensor] = None
|
||
The ‘beta’ term in layer-norm. Its shape must be
|
||
‘normalized_shape’.</p></li>
|
||
<li><p><strong>eps</strong> – float
|
||
The epsilon term to be added to the variance in the squared-root.</p></li>
|
||
<li><p><strong>use_diff_of_squares</strong> – bool
|
||
Does the plugin use the difference of squares to compute the
|
||
variance?</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The output tensor of that operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.log">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">log</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.LOG:</span> <span class="pre">1></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.log" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.log_softmax">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">log_softmax</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#log_softmax"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.log_softmax" title="Link to this definition">#</a></dt>
|
||
<dd><p>This function is equivalent of torch.nn.functional.log_softmax() i.e.
|
||
it performs log(softmax(input)) in a safer and faster way.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The data tensor on which log_softmax to be computed.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension of the input tensor along which log_softmax will be computed.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor of same shape as input with log_softmax computed on the specified dim.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.lora_plugin">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">lora_plugin</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">in_hidden_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">out_hidden_sizes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[0]</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_request_types</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">transa</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">transb</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_context_lengths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">max_low_rank</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">lora_ranks</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">lora_weights_pointers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight_index</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#lora_plugin"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.lora_plugin" title="Link to this definition">#</a></dt>
|
||
<dd><dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor (On GPU)
|
||
The input tensor. Its shape is [batch_size, seq_len, dim] or [num_tokens, dim] for remove_input_padding</p></li>
|
||
<li><p><strong>in_hidden_size/out_hidden_size</strong> – int
|
||
the lora computation workflow is
|
||
[M, in_hidden_size] -> [M, low_rank] -> [M, out_hidden_size]</p></li>
|
||
<li><p><strong>host_request_types</strong> – Tensor = None
|
||
The tensor on the host that indicates if a request is in context or
|
||
generation phase. Its shape is [batch_size]. See Inflight Batching
|
||
in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>transa</strong> – bool
|
||
Is the first input transposed? Set to ‘True’ if you want the first
|
||
input to be transposed, ‘False’ otherwise.</p></li>
|
||
<li><p><strong>transb</strong> – bool
|
||
Is the second input transposed? Set to ‘True’ if you want the
|
||
second input to be transposed, ‘False’ otherwise.</p></li>
|
||
<li><p><strong>host_context_lengths</strong> – cpu Tensor = None
|
||
A host tensor that contains the lengths of the different inputs,</p></li>
|
||
<li><p><strong>max_low_rank</strong> – int
|
||
Maximum low_rank, used to determine the workspace size.</p></li>
|
||
<li><p><strong>lora_ranks</strong> – cpu Tensor with shape [batch_size]
|
||
The low_rank of each request</p></li>
|
||
<li><p><strong>lora_weights_pointers</strong> – cpu int64 Tensor with shape [batch_size, 3]
|
||
The weights pointers of each request. Consist of in_pointer, out_pointer and possibly a scales vector pointer.</p></li>
|
||
<li><p><strong>weight_index</strong> – int
|
||
The index of weight if the weight pointer pointing to multiple weights.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.low_latency_gemm">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">low_latency_gemm</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mat2</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">alpha</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ndarray</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">strict_dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataType</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#low_latency_gemm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.low_latency_gemm" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.low_latency_gemm_swiglu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">low_latency_gemm_swiglu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_d0</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_d1</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">scale_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1.0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#low_latency_gemm_swiglu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.low_latency_gemm_swiglu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a matrix multiplication, followed by SwiGLU (<cite>x * SiLU(gate)</cite>) operation.</p>
|
||
<p>The second SwiGLU operation takes the preceding tensor, splits it into two halves
|
||
along the last dimension, applies SiLU to the second half and multiply the results. The
|
||
behaviour is undefined if the last dimension is not even.</p>
|
||
<blockquote>
|
||
<div><p>Parameters:
|
||
input : Tensor</p>
|
||
<blockquote>
|
||
<div><p>The first tensor (often called A).</p>
|
||
</div></blockquote>
|
||
<dl>
|
||
<dt>weight<span class="classifier">Tensor</span></dt><dd><p>The second tensor (often called B).</p>
|
||
</dd>
|
||
<dt>scale_d0<span class="classifier">float</span></dt><dd><p>The scale for dequantizing x, used for fp8</p>
|
||
</dd>
|
||
<dt>scale_d1<span class="classifier">float</span></dt><dd><p>The scale for dequantizing gate, used for fp8</p>
|
||
</dd>
|
||
<dt>scale_output<span class="classifier">float</span></dt><dd><p>The scale for quantizing output, used for fp8</p>
|
||
<blockquote>
|
||
<div><p>Returns:</p>
|
||
</div></blockquote>
|
||
</dd>
|
||
</dl>
|
||
<p>The tensor produced by the inserted layer.</p>
|
||
</div></blockquote>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.lt">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">lt</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.LESS:</span> <span class="pre">13></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.lt" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.mamba_conv1d">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">mamba_conv1d</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">conv_state_or_ptr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">conv_weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">conv_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_request_types</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dconv</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">pre_stride</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">post_stride</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_context_lengths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">slot_mapping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">apply_silu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#mamba_conv1d"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.mamba_conv1d" title="Link to this definition">#</a></dt>
|
||
<dd><dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor (On GPU)
|
||
The input tensor. Its shape is [batch_size, seq_len, dim] or [num_tokens, dim] for remove_input_padding</p></li>
|
||
<li><p><strong>conv_state_or_ptr</strong> – Tensor (On GPU or CPU)
|
||
The conv state tensor. Its shape is [batch_size, dconv - 1, dim]
|
||
Or the CPU tensor of shape [1] for the pointer of paged states.</p></li>
|
||
<li><p><strong>conv_weight</strong> – Tensor (On GPU)
|
||
The weight tensor. Its shape is [1, dconv, dim]</p></li>
|
||
<li><p><strong>conv_bias</strong> – Tensor (On GPU)
|
||
The bias tensor. Its shape is [dim]</p></li>
|
||
<li><p><strong>host_request_types</strong> – Tensor (On CPU)
|
||
The tensor on the host that indicates if a request is in context or
|
||
generation phase. Its shape is [batch_size]. See Inflight Batching
|
||
in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>last_token_ids</strong> – Tensor (On GPU)
|
||
The inclusive prefix-sum of the lengths or the lengths of the
|
||
sequences in the batch.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The hidden dimension of conv1d</p></li>
|
||
<li><p><strong>dconv</strong> – int
|
||
The window size of conv1d</p></li>
|
||
<li><p><strong>dtype</strong> – str
|
||
data type</p></li>
|
||
<li><p><strong>pre_stride</strong> – int = 0
|
||
The (pre) stride size of the input tensor.
|
||
The valid values of the input tensor are input[…, pre_stride: dim-post_stride]</p></li>
|
||
<li><p><strong>post_stride</strong> – int = 0
|
||
The (post) stride size of the input tensor.
|
||
The valid values of the input tensor are input[…, pre_stride: dim-post_stride]</p></li>
|
||
<li><p><strong>host_context_lengths</strong> – Tensor (On CPU) (Optional)
|
||
A host tensor that contains the lengths of the different inputs,</p></li>
|
||
<li><p><strong>slot_mapping</strong> – Tensor (On GPU) (Optional)
|
||
Real page index in state. Its shape is [dim], used for paged state, each page shape is [dconv, dim]</p></li>
|
||
<li><p><strong>apply_silu</strong> – bool
|
||
Is there a SiLU operation after the conv1d? When True apply
|
||
SiLU activation function after the conv1d.</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.masked_scatter">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">masked_scatter</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mask</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">source</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#masked_scatter"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.masked_scatter" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add the masked_scatter base on PyTorch definition.</p>
|
||
<p>See <a class="reference external" href="https://pytorch.org/docs/stable/generated/">https://pytorch.org/docs/stable/generated/</a><a href="#id9"><span class="problematic" id="id10">torch.Tensor.masked_scatter_</span></a>.html#torch-tensor-masked-scatter for a
|
||
description of that function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>mask</strong> – Tensor
|
||
The boolean mask tensor that indicates elements to select.</p></li>
|
||
<li><p><strong>source</strong> – Tensor
|
||
The tensor to copy from</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor containing the source tensor selected by mask.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.masked_select">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">masked_select</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mask</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#masked_select"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.masked_select" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to select elements from a tensor according to a boolean
|
||
mask tensor.</p>
|
||
<p>Given an input tensor, that function creates an operation that selects
|
||
elements at the indices indicated by the boolean mask tensor to create
|
||
a new tensor. The output tensor is a 1-D tensor.</p>
|
||
<p>The input tensor must have rank >= 1. The shapes of the input tensor and
|
||
the mask tensor don’t need to match, but they must be able to be broadcasted.</p>
|
||
<p>For example, on input=[[4, 2, 5], [2, 1, 2], [4, 7, 1]], which has a shape
|
||
[3, 3],</p>
|
||
<blockquote>
|
||
<div><p>masked_select(input, [[True, False, True], [False, True, False], [True, False, True]])</p>
|
||
</div></blockquote>
|
||
<p>will create a tensor of shape [5] that contains the [4, 5, 1, 4, 1].</p>
|
||
<blockquote>
|
||
<div><p>masked_select(input, [[True], [False], [True]])</p>
|
||
</div></blockquote>
|
||
<p>will create a tensor of shape [6] that contains the [4, 2, 5, 4, 7, 1].</p>
|
||
<blockquote>
|
||
<div><p>masked_select(input, [[False, False, True]])</p>
|
||
</div></blockquote>
|
||
<p>will create a tensor of shape [3] that contains the [5, 2, 1].</p>
|
||
<blockquote>
|
||
<div><p>masked_select(input, [False])</p>
|
||
</div></blockquote>
|
||
<p>will create a tensor of shape [0] which is empty.</p>
|
||
<p>That operation is implemented by NonZero, Shuffle and GatherV2 layers
|
||
in TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to select from.</p></li>
|
||
<li><p><strong>mask</strong> – Tensor
|
||
The boolean mask tensor that indicates elements to select.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The 1-D tensor containing the selected elements.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.matmul">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">matmul</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mat2</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">transa</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">transb</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">use_fp32_acc</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#matmul"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.matmul" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a matrix multiplication.</p>
|
||
<p>That operation maps to a tensorrt.IMatrixMultiplyLayer layer. As explained
|
||
in the TensorRT documentation, it computes the inner product between the
|
||
two inputs after applying an optional transposition on the inputs.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The first tensor (often called A).</p></li>
|
||
<li><p><strong>mat2</strong> – Tensor
|
||
The second tensor (often called B).</p></li>
|
||
<li><p><strong>transa</strong> – bool
|
||
Is the first input transposed? Set to ‘True’ if you want the first
|
||
input to be transposed, ‘False’ otherwise.</p></li>
|
||
<li><p><strong>transb</strong> – bool
|
||
Is the second input transposed? Set to ‘True’ if you want the
|
||
second input to be transposed, ‘False’ otherwise.</p></li>
|
||
<li><p><strong>use_fp32_acc</strong> – bool
|
||
Set to ‘True’ if for accuracy reason, this fp16 matmul needs to use
|
||
fp32 accumulation. This can be a per model and per matmul decision.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the inserted layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.max">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">max</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#max"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.max" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to compute the max along a dimension.</p>
|
||
<p>Computes the max along the dimension ‘dim’ of the input tensor.</p>
|
||
<p>It is implemented using the IReduceLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension along which the mean is computed.</p></li>
|
||
<li><p><strong>keepdim</strong> – bool
|
||
Is the dimension kept in the reduced tensor? When True the
|
||
dimension is kept, it is removed from the shape otherwise.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this reduction operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.maximum">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">maximum</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.MAX:</span> <span class="pre">2></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.maximum" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.mean">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">mean</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#mean"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.mean" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to compute the mean along a dimension.</p>
|
||
<p>Computes the mean along the dimension ‘dim’ of the input tensor.</p>
|
||
<p>It is implemented using the IReduceLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension along which the mean is computed.</p></li>
|
||
<li><p><strong>keepdim</strong> – bool
|
||
Is the dimension kept in the reduced tensor? When True the
|
||
dimension is kept, it is removed from the shape otherwise.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this reduction operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.meshgrid2d">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">meshgrid2d</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">y</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#meshgrid2d"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.meshgrid2d" title="Link to this definition">#</a></dt>
|
||
<dd><p>Creates grids (2D) of coordinates specified by the 1D inputs (only supports <cite>indexing=’xy’</cite>).</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>x</strong> – Tensor
|
||
The first input (1D) tensor.</p></li>
|
||
<li><p><strong>y</strong> – Tensor
|
||
The second input (1D) tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tuple of two tensors produced.</p>
|
||
</dd>
|
||
</dl>
|
||
<dl class="simple">
|
||
<dt>TODO: Add full support for torch.meshgrid.</dt><dd><p>See <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.meshgrid.html#torch-meshgrid">https://pytorch.org/docs/stable/generated/torch.meshgrid.html#torch-meshgrid</a></p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.min">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">min</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">*,</span> <span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ReduceOperation</span> <span class="pre">=</span> <span class="pre"><ReduceOperation.MIN:</span> <span class="pre">3>,</span> <span class="pre">dim:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">~typing.Tuple[int],</span> <span class="pre">keepdim:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.min" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an reduction operation to do along a dimension.</p>
|
||
<p>It is implemented using the IReduceLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ReduceOperation
|
||
The reduction operation to perform.
|
||
Options: SUM, PROD, MAX, MIN, AVG</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension along which the reduction is performed.</p></li>
|
||
<li><p><strong>keepdim</strong> – bool
|
||
Is the dimension kept in the reduced tensor? When True the
|
||
dimension is kept, it is removed from the shape otherwise.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this reduction operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.minimum">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">minimum</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.MIN:</span> <span class="pre">3></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.minimum" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.modulo">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">modulo</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">y</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#modulo"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.modulo" title="Link to this definition">#</a></dt>
|
||
<dd><p>This function adds an element-wise modulo (x % y) operation for a given tensor.
|
||
Since there is no TensorRT layer that can directly perform this,
|
||
this function implements it using some of the basic operations.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>A tensor that represents (x % y) modulo operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.mul">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">mul</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.PROD:</span> <span class="pre">1></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.mul" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.non_gated_version">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">non_gated_version</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">activation</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#non_gated_version"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.non_gated_version" title="Link to this definition">#</a></dt>
|
||
<dd><p>Given an activation function, get the non-gated version.</p>
|
||
<p>If the activation function is non-gated, it returns the same activation
|
||
function name.</p>
|
||
<p>For example, that function returns ‘silu’ for ‘swiglu’ and ‘relu’ for
|
||
‘relu’.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>activation</strong> – str
|
||
The name of the activation function.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The name of the non-gated activation function.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.nonzero">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">nonzero</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#nonzero"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.nonzero" title="Link to this definition">#</a></dt>
|
||
<dd><p>Adds a layer that finds the indices of non-zero values of the input tensor.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>input</strong> – Tensor
|
||
The input tensor for which we need to find the indices of non-zero values.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor of shape [D, C] where D is the number of dimensions of <cite>input</cite> and
|
||
C is the number of non-zero values in it.
|
||
Each column of this 2D tensor represents the index tuple for each non-zero value.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.not_op">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">not_op</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.NOT:</span> <span class="pre">20></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.not_op" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.op_and">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">op_and</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.AND:</span> <span class="pre">8></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.op_and" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.op_or">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">op_or</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.OR:</span> <span class="pre">9></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.op_or" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.op_xor">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">op_xor</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.XOR:</span> <span class="pre">10></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.op_xor" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.outer">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">outer</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">vec2</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#outer"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.outer" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to compute the outer product between two tensors.</p>
|
||
<p>That operation creates an Einsum node.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The first input tensor.</p></li>
|
||
<li><p><strong>vec2</strong> – Tensor
|
||
The second input tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The output tensor produced by this layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.pad">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">pad</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">pad</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'constant'</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#pad"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.pad" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a pad layer.</p>
|
||
<p>The padding layer adds zero-padding at the start and end of the input tensor. And the
|
||
padding size by which to pad some dimensions of input are described starting from the
|
||
last dimension and moving forward.</p>
|
||
<p><cite>[len(pad) / 2]</cite> dimensions of input will be padded. For example, to pad only the last
|
||
dimension of the input tensor, then pad has the form [padding_left, padding_right]; to
|
||
pad the last 2 dimensions of the input tensor, then use [padding_left, padding_right,
|
||
padding_top, padding_bottom]; to pad the last 3 dimensions, use [padding_left,
|
||
padding_right, padding_top, padding_bottom, padding_front, padding_back].</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the padding_2d is performed.</p></li>
|
||
<li><p><strong>pad</strong> – sequence of int
|
||
An m-elements tuple for padding, where its length m meets the requirement that
|
||
m <= 2*input dimensions, and m is even.</p></li>
|
||
<li><p><strong>mode</strong> – str
|
||
Only ‘constant’ is supported.</p></li>
|
||
<li><p><strong>value</strong> – float
|
||
Fill value for ‘constant’ padding. Default: 0.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the inserted layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.permute">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">permute</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dims</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#permute"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.permute" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to permute the dimensions of a tensor.</p>
|
||
<p>The dimensions of the input tensor are permuted according to the sequence
|
||
of dimensions in ‘dims’. That operation maps to tensorrt.IShuffleLayer where
|
||
the second transposition is described by the indices in ‘dims’.</p>
|
||
<p>Given a tensor of rank N, the result of the permutation is a tensor of rank
|
||
N in which the i-th input dimension maps to the dims[i]-th dimension.</p>
|
||
<p>For example, permute(input, [1, 0]) will transpose a 2D tensor by permuting
|
||
the rows and columns.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to permute.</p></li>
|
||
<li><p><strong>dims</strong> – Sequence[int]
|
||
The description of the permutation.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the permutation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.pow">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">pow</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.POW:</span> <span class="pre">6></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.pow" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.prod">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">prod</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor,</span> <span class="pre">*,</span> <span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ReduceOperation</span> <span class="pre">=</span> <span class="pre"><ReduceOperation.PROD:</span> <span class="pre">1>,</span> <span class="pre">dim:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">~typing.Tuple[int],</span> <span class="pre">keepdim:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.prod" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an reduction operation to do along a dimension.</p>
|
||
<p>It is implemented using the IReduceLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ReduceOperation
|
||
The reduction operation to perform.
|
||
Options: SUM, PROD, MAX, MIN, AVG</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension along which the reduction is performed.</p></li>
|
||
<li><p><strong>keepdim</strong> – bool
|
||
Is the dimension kept in the reduced tensor? When True the
|
||
dimension is kept, it is removed from the shape otherwise.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this reduction operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.quick_gelu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">quick_gelu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#quick_gelu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.quick_gelu" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.rand">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">rand</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">shape</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">low</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">high</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">DataType</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'float32'</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#rand"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.rand" title="Link to this definition">#</a></dt>
|
||
<dd><p>This operation adds a fill layer that generates a random (uniform) tensor with the specified shape and data type.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>shape</strong> – Tensor
|
||
The shape of the tensor needed to be generated.</p></li>
|
||
<li><p><strong>low</strong> – float
|
||
The minimum value (inclusive) of the range used for random.</p></li>
|
||
<li><p><strong>high</strong> – float
|
||
The maximum value (inclusive) of the range used for random.</p></li>
|
||
<li><p><strong>dtype</strong> – Union[str, trt.DataType]
|
||
The desired data type for the output tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The generated random tensor produced by the fill layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.rearrange">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">rearrange</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">expression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#rearrange"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.rearrange" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a rearrange operation on a tensor.</p>
|
||
<p>This operation is a reader-friendly smart element reordering for multidimensional tensors,
|
||
including functionality of transpose (axes permutation), reshape (view), squeeze, unsqueeze,
|
||
stack, concatenate and other operations. Please see: <a class="reference external" href="https://einops.rocks/api/rearrange/">https://einops.rocks/api/rearrange/</a></p>
|
||
<dl class="simple">
|
||
<dt>For example, if the shape of input tensor is [32, 30, 40, 3], and run:</dt><dd><p><cite>rearrange(x, ‘b (h h1) (w w1) c -> b h w 1 (c h1 w1) 1’, h1=2, w1=2)</cite></p>
|
||
</dd>
|
||
</dl>
|
||
<p>it would produce a tensor with shape as [32, 15, 20, 1, 12, 1].</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Union[Tensor, Sequence[Tensor]]
|
||
If it is a tensor, it will directly operate on it.
|
||
Otherwise, if it is a sequence, it will concat it to a tensor and then
|
||
operates on it.</p></li>
|
||
<li><p><strong>expression</strong> – str
|
||
The expression about how to reorder the tensor in a reader-friendly way.</p></li>
|
||
<li><p><strong>kwargs</strong> – Keyword arguments to set some identifiers with specific values.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The output tensor of this operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.recv">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">recv</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">src</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#recv"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.recv" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that performs a recv to a rank from another.</p>
|
||
<p>The recv operation receives a tensor from on a rank from another. If a rank ‘i’
|
||
receives a tensor from a rank ‘j’, the rank ‘j’ must have a corresponding ‘send’
|
||
operation to rank ‘j’. See ‘send’.</p>
|
||
<p>That operation is implemented using a plugin that wraps the NCCL recv
|
||
point-to-point operation. See
|
||
<a class="reference external" href="https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/p2p.html#ncclrecv">https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/p2p.html#ncclrecv</a>
|
||
for details.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>tensor</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>src</strong> – int
|
||
The rank that sends the tensor to.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.reduce">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">reduce</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ReduceOperation</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#reduce"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.reduce" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an reduction operation to do along a dimension.</p>
|
||
<p>It is implemented using the IReduceLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ReduceOperation
|
||
The reduction operation to perform.
|
||
Options: SUM, PROD, MAX, MIN, AVG</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension along which the reduction is performed.</p></li>
|
||
<li><p><strong>keepdim</strong> – bool
|
||
Is the dimension kept in the reduced tensor? When True the
|
||
dimension is kept, it is removed from the shape otherwise.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this reduction operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.reduce_scatter">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">reduce_scatter</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#reduce_scatter"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.reduce_scatter" title="Link to this definition">#</a></dt>
|
||
<dd></dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.relu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">relu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">act_type:</span> <span class="pre">~tensorrt_bindings.tensorrt.ActivationType</span> <span class="pre">=</span> <span class="pre"><ActivationType.RELU:</span> <span class="pre">0></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.relu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an activation function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p></li>
|
||
<li><p><strong>act_type</strong> – trt.ActivationType
|
||
The type of the activation (RELU, TANH, SIGMOID, …).</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>relu for op=trt.ActivationType.RELU
|
||
tanh for op=trt.ActivationType.TANH
|
||
sigmoid for op=trt.ActivationType.SIGMOID</p>
|
||
</div></blockquote>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.repeat">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">repeat</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">sizes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#repeat"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.repeat" title="Link to this definition">#</a></dt>
|
||
<dd><p>Repeats the tensor along the specified dimensions.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The tensor to be repeated.</p></li>
|
||
<li><p><strong>sizes</strong> – Sequence[int]
|
||
The number of times to repeat the tensor along each dimension.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor except for repeated input tensors along specified dim.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.repeat_interleave">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">repeat_interleave</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#repeat_interleave"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.repeat_interleave" title="Link to this definition">#</a></dt>
|
||
<dd><p>Repeats elements of a tensor along an axis.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>repeats</strong> – int
|
||
The number of repetitions along axis specified.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension along which repetitions are performed.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor with the same shape as input except for repeated elements along specified dim.</p>
|
||
</dd>
|
||
</dl>
|
||
<p>TODO: Allow repeats to be a list of integers and dim to be unspecified.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.rg_lru">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">rg_lru</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">A</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">state_or_ptr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_request_types</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">block_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">y</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">y_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">gate</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">gate_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">gate_x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">gate_x_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">gate_a</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">gate_a_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">slot_mapping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#rg_lru"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.rg_lru" title="Link to this definition">#</a></dt>
|
||
<dd><dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor (On GPU)
|
||
The input tensor. Its shape is [batch_size, seq_len, dim]</p></li>
|
||
<li><p><strong>A</strong> – Tensor (On GPU)
|
||
A matrix. Its shape is [dim]</p></li>
|
||
<li><p><strong>state_or_ptr</strong> – Tensor (On GPU or CPU)
|
||
The lru state tensor. Its shape is [batch_size, dstate, dim]
|
||
Or the CPU tensor of shape [1] for the pointer of paged states.</p></li>
|
||
<li><p><strong>host_request_types</strong> – Tensor (On CPU)
|
||
The tensor on the host that indicates if a request is in context or
|
||
generation phase. Its shape is [batch_size]. See Inflight Batching
|
||
in docs/source/advanced/gpt-attention.md,</p></li>
|
||
<li><p><strong>last_token_ids</strong> – Tensor (On GPU)
|
||
The inclusive prefix-sum of the lengths or the lengths of the
|
||
sequences in the batch.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The inner dimension of RG_LRU block</p></li>
|
||
<li><p><strong>block_size</strong> – int
|
||
The block size of the block diagonal linear layer. It is used to
|
||
support the cases that enable fused gate.</p></li>
|
||
<li><p><strong>dtype</strong> – str
|
||
data type</p></li>
|
||
<li><p><strong>y</strong> – Tensor (On GPU) (Optional)
|
||
The y tensor. Its shape is [batch_size, seq_len, dim]</p></li>
|
||
<li><p><strong>y_bias</strong> – Tensor (On GPU) (Optional)
|
||
The y_bias tensor. Its shape is [dim]. If y_bias is not None, we
|
||
will fuse GELU(y + y_bias) in this function.</p></li>
|
||
<li><p><strong>gate</strong> – Tensor (On GPU) (Optional)
|
||
The gate tensor. Its shape is [batch_size, seq_len, 2 * dim].
|
||
If gate is not None, we will fuse the gate_x and gate_a, otherwise
|
||
use those two tensors.</p></li>
|
||
<li><p><strong>gate_bias</strong> – Tensor (On GPU) (Optional)
|
||
The gate_bias tensor. Its shape is [2 * block_num, dim // block_num].
|
||
If gate_bias is not None, we will fuse the bias add in this function.</p></li>
|
||
<li><p><strong>gate_x</strong> – Tensor (On GPU) (Optional)
|
||
The gate_x tensor. Its shape is [batch_size, seq_len, dim]</p></li>
|
||
<li><p><strong>gate_x_bias</strong> – Tensor (On GPU) (Optional)
|
||
The gate_x_bias tensor. Its shape is [block_num, dim // block_num].
|
||
If gate_x_bias is not None, we will fuse the bias add in this function.</p></li>
|
||
<li><p><strong>gate_a</strong> – Tensor (On GPU) (Optional)
|
||
The gate_a tensor. Its shape is [batch_size, seq_len, dim]</p></li>
|
||
<li><p><strong>gate_a_bias</strong> – Tensor (On GPU) (Optional)
|
||
The gate_a_bias tensor. Its shape is [block_num, dim // block_num].
|
||
If gate_a_bias is not None, we will fuse the bias add in this function.</p></li>
|
||
<li><p><strong>slot_mapping</strong> – Tensor (On GPU) (Optional)
|
||
Real page index in state. Its shape is [dim], used for paged state, each page shape is [dstate, dim]</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.rms_norm">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">rms_norm</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">normalized_shape</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">num_groups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1e-06</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#rms_norm"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.rms_norm" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a RMS norm operation on a tensor.</p>
|
||
<p>That operation applies the rms-normalization to its input tensor. In its
|
||
simplest form, for large language models, the ‘normalized_shape’ should be
|
||
set to the hidden dimension of the activation tensor. Otherwise, it is the
|
||
shape of the normalized fraction of the tensor (starting from the
|
||
right-most dimension).</p>
|
||
<p>The ‘weight’ tensor corresponds to ‘gamma’ in the rms-norm formula.
|
||
The ‘eps’ value is added to the variance before computing the squared-root.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The tensor to normalize.</p></li>
|
||
<li><p><strong>normalized_shape</strong> – Union[int, Tuple[int]]
|
||
The shape of the sub-tensor that is normalized. Use ‘hidden_dim’ to
|
||
normalize the inner-most dimension of an activation tensor in LLMs.</p></li>
|
||
<li><p><strong>num_groups</strong> – int = 1
|
||
The group size.</p></li>
|
||
<li><p><strong>weight</strong> – Optional[Tensor] = None
|
||
The ‘gamma’ term in layer-norm. Its shape must be
|
||
‘normalized_shape’.</p></li>
|
||
<li><p><strong>eps</strong> – float
|
||
The epsilon term to be added to the variance in the squared-root.weig</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The output tensor of that operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.round">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">round</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.ROUND:</span> <span class="pre">22></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.round" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.scatter">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">scatter</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">indices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">updates</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#scatter"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.scatter" title="Link to this definition">#</a></dt>
|
||
<dd><p>This operation adds a layer that creates an output tensor by element-wise
|
||
copying values from the input tensor and then updating values by the given</p>
|
||
<blockquote>
|
||
<div><p><cite>indices</cite> and <cite>updates</cite> tensors.
|
||
For a 2D input tensor, it first copies the input to output,
|
||
then updates the output tensor like the following for each entry in <cite>updates</cite>:</p>
|
||
<blockquote>
|
||
<div><p>output[indices[i][j]][j] = updates[i][j] if dim=0
|
||
output[i][indices[i][j]] = updates[i][j] if dim=1</p>
|
||
</div></blockquote>
|
||
<p>If the <cite>input</cite> tensor is [[1, 2, 3], [4, 5, 6]],
|
||
the indices tensor is [[1, 2], [0, 1]],
|
||
the updates tensor is [[-1, -2], [-3, -4]], and dim=1
|
||
the output tensor will be [[1, -1, -2], [-3, -4, 6]].
|
||
Parameters:</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>input: Tensor</dt><dd><p>The input data that needs to be updated.</p>
|
||
</dd>
|
||
<dt>dim: int</dt><dd><p>The axis on which the scatter is to be performed.</p>
|
||
</dd>
|
||
<dt>indices: Tensor</dt><dd><p>An integer tensor of the same rank as input that indicates the positions to be updated.</p>
|
||
</dd>
|
||
<dt>updates: Tensor</dt><dd><p>A data tensor of same shape as the <cite>indices</cite> tensor that contains the update values.</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<dl class="simple">
|
||
<dt>Returns:</dt><dd><p>A tensor created by the element-wise scatter layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.scatter_nd">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">scatter_nd</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mask</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">source</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#scatter_nd"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.scatter_nd" title="Link to this definition">#</a></dt>
|
||
<dd><p>Scatter_nd is a tensor operation that writes or updates values in a tensor based on indices.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to be updated</p></li>
|
||
<li><p><strong>mask</strong> – Tensor
|
||
A tensor of indices specifying the locations in data to be updated.</p></li>
|
||
<li><p><strong>source</strong> – Tensor
|
||
A tensor of values to be written or scattered into data.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>New tensor with the same shape as the input tensor data,
|
||
where the values from the source tensor are scattered or written into the output tensor
|
||
at the locations specified by the mask tensor.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.select">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">index</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#select"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.select" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to select a slice of elements from a tensor.</p>
|
||
<p>Given an input tensor, that function creates an operation that selects the
|
||
index-th slice of elements in the dimension ‘dim’ to create a new tensor.
|
||
The output tensor has a shape in which the input dimension ‘dim’ is
|
||
removed.</p>
|
||
<p>The ‘index’ can either be an integer or a 1D tensor containing a single
|
||
element.</p>
|
||
<p>For example, on input=[[4, 2, 5], [2, 1, 2], [4, 7, 1]], which has a shape
|
||
[3, 3],</p>
|
||
<blockquote>
|
||
<div><p>select(input, 0, 1)</p>
|
||
</div></blockquote>
|
||
<p>will create a tensor of shape [3] that contains the [2, 1, 2].</p>
|
||
<p>Regarding the shape of the output tensor, the dimension ‘dim’ is removed.
|
||
It means that for a tensor of shape [4, 2, 6, 3],</p>
|
||
<blockquote>
|
||
<div><p>select(input, 2, 4)</p>
|
||
</div></blockquote>
|
||
<p>will select the 5th slice (index == 4) from the 3rd dimension (dim == 2)
|
||
and return a tensor of shape [4, 2, 3] (i.e. the 3rd dimension is removed).</p>
|
||
<p>That operation maps to the TensorRT IGatherLayer.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to select from.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension to select from.</p></li>
|
||
<li><p><strong>index</strong> – Union[Tensor, int]
|
||
The index of the slice in the ‘dim’ dimension to select.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor containing the selected slice.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.selective_scan">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">selective_scan</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">state_or_ptr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">delta</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">delta_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">A</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">BC</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">D</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_request_types</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dstate</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dt_rank</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">delta_softplus</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">z</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">host_context_lengths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">slot_mapping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">nheads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">ngroups</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">chunk_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">256</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mamba_version</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'Mamba1'</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#selective_scan"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.selective_scan" title="Link to this definition">#</a></dt>
|
||
<dd><dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor (On GPU)
|
||
The input tensor. Its shape is [batch_size, seq_len, dim]</p></li>
|
||
<li><p><strong>state_or_ptr</strong> – Tensor (On GPU or CPU)
|
||
The ssm state tensor. Its shape is [batch_size, dstate, dim]
|
||
Or the CPU tensor of shape [1] for the pointer of paged states.</p></li>
|
||
<li><p><strong>delta</strong> – Tensor (On GPU)
|
||
The delta tensor.
|
||
mamba: Its shape is [batch_size, seq_len, dim] or [num_tokens, dim] for remove_input_padding
|
||
mamba2: Its shape is [batch_size, seq_len, nheads] or [num_tokens, nheads] for remove_input_padding</p></li>
|
||
<li><p><strong>delta_bias</strong> – Tensor (On GPU)
|
||
The delta bias tensor.
|
||
mamba: Its shape is [dim]
|
||
mamba2: Its shape is [nheads]</p></li>
|
||
<li><p><strong>A</strong> – Tensor (On GPU)
|
||
A matrix.
|
||
mamba: Its shape is [dstate, dim]
|
||
mamba2: Its shape is [nheads]</p></li>
|
||
<li><p><strong>BC</strong> – Tensor (On GPU)
|
||
B and C matrix.
|
||
mamba: Its shape is [batch_size, seq_len, dstate * 2] or [num_tokens, dstate * 2] for remove_input_padding
|
||
mamba2: Its shape is [batch_size, seq_len, ngroups * dstate * 2] or [num_tokens, ngroups * dstate * 2] for remove_input_padding</p></li>
|
||
<li><p><strong>D</strong> – Tensor (On GPU)
|
||
D matrix.
|
||
mamba: Its shape is [dim]
|
||
mamba2: Its shape is [nheads]</p></li>
|
||
<li><p><strong>host_request_types</strong> – Tensor (On CPU)
|
||
The tensor on the host that indicates if a request is in context or
|
||
generation phase. Its shape is [batch_size]. See Inflight Batching
|
||
in docs/source/advanced/gpt-attention.md</p></li>
|
||
<li><p><strong>last_token_ids</strong> – Tensor (On GPU)
|
||
The inclusive prefix-sum of the lengths or the lengths of the
|
||
sequences in the batch.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The inner dimension of SSM block</p></li>
|
||
<li><p><strong>dstate</strong> – int
|
||
The state dimension of SSM block</p></li>
|
||
<li><p><strong>dt_rank</strong> – int
|
||
The rank dimension of dt_proj</p></li>
|
||
<li><p><strong>delta_softplus</strong> – bool
|
||
Do we apply softplus to the delta.</p></li>
|
||
<li><p><strong>dtype</strong> – str
|
||
data type</p></li>
|
||
<li><p><strong>z</strong> – Tensor (On GPU) (Optional)
|
||
The z tensor. Its shape is [batch_size, seq_len, dim] or [num_tokens, dim] for remove_input_padding</p></li>
|
||
<li><p><strong>host_context_lengths</strong> – Tensor (On CPU) (Optional)
|
||
A host tensor that contains the lengths of the different inputs,</p></li>
|
||
<li><p><strong>slot_mapping</strong> – Tensor (On GPU) (Optional)
|
||
Real page index in state. Its shape is [dim], used for paged state, each page shape is [dstate, dim]</p></li>
|
||
<li><p><strong>nheads</strong> – int (Optional)
|
||
The number of heads.</p></li>
|
||
<li><p><strong>ngroups</strong> – int (Optional)
|
||
The number of groups.</p></li>
|
||
<li><p><strong>chunk_size</strong> – int (Optional)
|
||
The chunk_size is used for the chunk_scan kernel.</p></li>
|
||
<li><p><strong>mamba_version</strong> – int (Optional)
|
||
Mamba version, support Mamba1 as default.</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.send">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">send</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tgt</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#send"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.send" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that performs a send from a rank to another.</p>
|
||
<p>The send operation sends a tensor from one rank to another. If a rank ‘i’
|
||
sends a tensor to a rank ‘j’, the rank ‘j’ must have a corresponding ‘recv’
|
||
operation from rank ‘i’. See ‘recv’.</p>
|
||
<p>That operation is implemented using a plugin that wraps the NCCL send
|
||
point-to-point operation. See
|
||
<a class="reference external" href="https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/p2p.html#ncclsend">https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/p2p.html#ncclsend</a>
|
||
for details.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>tensor</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>tgt</strong> – int
|
||
The rank that receives the tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.shape">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">shape</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">cast_to_dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">DataType</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">clip_before_cast</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#shape"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.shape" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to create a shape tensor.</p>
|
||
<p>The shape tensor can either be the shape of the input tensor when the
|
||
parameter dim is None or a scalar (tensor of rank 0) that corresponds to
|
||
the size of dim-th dimension.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor from which we want to extract the shape or the
|
||
size in one dimension.</p></li>
|
||
<li><p><strong>dim</strong> – Optional[int]
|
||
The dimension from which to extract the size. If it is None, the
|
||
entire shape of the input tensor is returned.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>A tensor that contains the shape of the input tensor (if ‘dim’ is None)
|
||
or the size in the dimension ‘dim’ of the input tensor. If ‘dim’ is
|
||
‘None’, that tensor has the same rank as the input tensor, otherwise
|
||
its rank is 0.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.sigmoid">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">sigmoid</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">act_type:</span> <span class="pre">~tensorrt_bindings.tensorrt.ActivationType</span> <span class="pre">=</span> <span class="pre"><ActivationType.SIGMOID:</span> <span class="pre">1></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.sigmoid" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an activation function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p></li>
|
||
<li><p><strong>act_type</strong> – trt.ActivationType
|
||
The type of the activation (RELU, TANH, SIGMOID, …).</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>relu for op=trt.ActivationType.RELU
|
||
tanh for op=trt.ActivationType.TANH
|
||
sigmoid for op=trt.ActivationType.SIGMOID</p>
|
||
</div></blockquote>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.silu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">silu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#silu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.silu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a SiLU (<cite>x * sigmoid(x)</cite>) operation.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.sin">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">sin</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.SIN:</span> <span class="pre">6></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.sin" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.slice">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">slice</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">starts</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">sizes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">strides</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">SampleMode</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">fill_value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#slice"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.slice" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to extract a slice from a tensor.</p>
|
||
<p>As described in the TensorRT documentation of the ISliceLayer, the slice
|
||
layer has two variants: Static and dynamic.</p>
|
||
<p>For static slicing, this function takes the starts and sizes values in the
|
||
different dimensions to slice at layer creation time via a sequence of
|
||
integers. For dynamic slicing, it accepts starts and sizes as
|
||
tensorrt.ITensor`s.</p>
|
||
<p>The slice layer selects for each dimension a start location from within the
|
||
input tensor, and copies elements to the output tensor using a stride of 1
|
||
across the input tensor. Start and size tensors must be 1-D int32 shape
|
||
tensors if not specified as a sequence of integers.</p>
|
||
<p>As an example, on input = [[0, 2, 4], [1, 3, 5]], the call to</p>
|
||
<blockquote>
|
||
<div><p>slice(input, start=[1, 0], size=[1, 2])</p>
|
||
</div></blockquote>
|
||
<p>will produce the tensor [[1, 3]] as output. The slice operator when
|
||
executed by TensorRT will copy one row (because size[0] == 1) starting from
|
||
the 2nd row (because start[0] == 1) and two columns (size[1] == 2) starting
|
||
from the 1st column (because start[1] == 0).</p>
|
||
<p>In pseudo-code the behavior of that operation can be described as follows
|
||
for a 2D tensor (and easily be extended to more dimensions):</p>
|
||
<blockquote>
|
||
<div><p>output = Tensor(shape=sizes)
|
||
for ii in range(sizes[0]):</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>for jj in range(sizes[1]):</dt><dd><p>output[ii][jj] = input[starts[0]+ii][starts[1]+jj]</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
</div></blockquote>
|
||
<p>Note that it is common in deep-learning frameworks to use ranges
|
||
[start:end] for similar operations. It can be emulated by setting the sizes
|
||
argument such that in each dimension [start:start+size] == [start:end] i.e.
|
||
size = end-start.</p>
|
||
<p>TensorRT supports different slice modes but that function restricts that
|
||
choice to <cite>mode == tensorrt.SampleMode.STRICT_BOUNDS</cite>.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the slicing is performed.</p></li>
|
||
<li><p><strong>starts</strong> – Union[Tensor, Sequence[int]]
|
||
The starting points, in the input tensor, and each dimension.</p></li>
|
||
<li><p><strong>sizes</strong> – Union[Tensor, Sequence[int]]
|
||
The number of elements in each dimension of the sliced tensor (output).</p></li>
|
||
<li><p><strong>strides</strong> – Union[Tensor, Sequence[int]]
|
||
The step be taken from start, in input tensor.</p></li>
|
||
<li><p><strong>mode</strong> – trt.SampleMode
|
||
The mode that controls how the slice operation handles out of bounds coordinates.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the slice layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.softmax">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">softmax</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#softmax"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.softmax" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to compute softmax on a tensor.</p>
|
||
<p>That operation computes the softmax on the input tensor in the dimension
|
||
‘dim’ if specified. Otherwise, it is applied on the last dimension.</p>
|
||
<p>It inserts a ISoftmaxLayer to the TensorRT graph.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which to apply softmax.</p></li>
|
||
<li><p><strong>dim</strong> – Optional[int]
|
||
The dimension used to apply softmax.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The output tensor of the softmax layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.softplus">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">softplus</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">beta</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">threshold</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#softplus"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.softplus" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add the softplus activation base on PyTorch definition.</p>
|
||
<p>See <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.nn.functional.softplus.html#torch-nn-functional-softplus">https://pytorch.org/docs/stable/generated/torch.nn.functional.softplus.html#torch-nn-functional-softplus</a> for a
|
||
description of that function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
Input TensorRT-LLM Tensor.</p></li>
|
||
<li><p><strong>beta</strong> – float
|
||
The parameter for softplus computation.</p></li>
|
||
<li><p><strong>threshold</strong> – float
|
||
The threshold for reverting to the linear function when input * beta > threshold</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The output tensor created by that layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.split">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">split</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">tensor</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">split_size_or_sections</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#split"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.split" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation that splits a tensor into sub-tensors.</p>
|
||
<p>This operation creates a list of tensors that are obtained from the input
|
||
tensor by slicing it along the dimension ‘dim’. If ‘split_size_or_sections’
|
||
is an integer, the tensor is split into ‘input.shape[dim] /
|
||
split_size_or_sections’ slices. If ‘split_size_or_sections’ is a list of
|
||
sizes, the tensor is split into ‘len(split_size_or_sections)’ slices and
|
||
the size of the ith slice is given by ‘split_size_or_sections[i]’.</p>
|
||
<p>There are several constraints with the current implementation:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><p>The input tensor must be static (no dynamic dimension),</p></li>
|
||
<li><p>If ‘split_size_or_sections’ is an integer, the number of elements in
|
||
the ‘dim’ dimension of the input must be a multiple of
|
||
‘split_size_or_sections’: ‘input.shape[dim] % split_size_or_sections == 0’.</p></li>
|
||
<li><p>If ‘split_size_or_sections’ is a sequence, the sum of the elements in
|
||
‘split_size_or_sections’ must be equal to the size in the dimension
|
||
‘dim’: ‘input.shape[dim] == sum(ii for ii in split_size_or_sections)’.</p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>That operation is implemented using a ‘slice’ operation for each output
|
||
slice.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>tensor</strong> – Tensor
|
||
The input tensor to slice.</p></li>
|
||
<li><p><strong>split_size_or_sections</strong> – Union[int, Sequence[int]]
|
||
If it is an integer, it encodes the size of each slice. Otherwise,
|
||
if it is a sequence, it is the size of each slice.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension of the tensor to slice.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The list of tensors produced by the different operations.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.sqrt">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">sqrt</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.UnaryOperation</span> <span class="pre">=</span> <span class="pre"><UnaryOperation.SQRT:</span> <span class="pre">2></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.sqrt" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.squared_relu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">squared_relu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#squared_relu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.squared_relu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a Squared ReLU operation.</p>
|
||
<p>This function applies ReLU and squares the output.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.squeeze">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">squeeze</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">zero_is_placeholder</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#squeeze"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.squeeze" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to remove singleton dimensions of a tensor.</p>
|
||
<p>This functions creates an operation that removes singleton dimension
|
||
(dimension of size 1) at positions ‘dim’ in the input tensor. It works with
|
||
negative values for the ‘dim’.</p>
|
||
<p>For example, for a tensor ‘input’ of shape [1, 4, 1, 4]:</p>
|
||
<blockquote>
|
||
<div><p>squeeze(input, 0) will produce an output of shape [4, 1, 4],
|
||
squeeze(input, 2) will produce an output of shape [1, 4, 4],
|
||
squeeze(input, [0, 2]) will produce an output of shape [4, 4],
|
||
squeeze(input, [-2]) will produce an output of shape [1, 4, 4],</p>
|
||
</div></blockquote>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor for which the singleton dimensions will be removed.</p></li>
|
||
<li><p><strong>dim</strong> – Union[int, Sequence[int]]
|
||
The index of the singleton dimensions in the input tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.stack">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">stack</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#stack"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.stack" title="Link to this definition">#</a></dt>
|
||
<dd><blockquote>
|
||
<div><p>Add an operation to contact input tensors along a new dimension.</p>
|
||
<p>The function creates an operation that creates a new dim for all the
|
||
input tensors and then concatenates them along that new dim.</p>
|
||
</div></blockquote>
|
||
<p>.</p>
|
||
<blockquote>
|
||
<div><p>All the tensors in ‘inputs’ must have the same shape.</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>for ii in range(inputs[0].rank()):</dt><dd><p>assert all(inp.shape[ii] == inputs[0].shape[ii] for inp in inputs)</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<p>The shape of the output tensor is defined as:</p>
|
||
<blockquote>
|
||
<div><p>output.rank() = inputs[0].rank() + 1</p>
|
||
<p>output.shape[dim] = len(inputs)</p>
|
||
<dl class="simple">
|
||
<dt>for ii in range(inputs[0].rank()):</dt><dd><dl class="simple">
|
||
<dt>if ii < dim:</dt><dd><p>output.shape[ii] = inputs[0].shape[ii]</p>
|
||
</dd>
|
||
<dt>else:</dt><dd><p>output.shape[ii+1] = inputs[0].shape[ii]</p>
|
||
</dd>
|
||
</dl>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<p>For example, given a sequence of two 2D tensors [[0, 1], [2, 3]] and
|
||
[[4, 5], [6, 7]] both of shape [2, 2],</p>
|
||
<blockquote>
|
||
<div><p>stack(inputs, 0)</p>
|
||
</div></blockquote>
|
||
<p>will produce [[[0, 1], [2, 3]], [[4, 5], [6, 7]]] of shape [2, 2, 2] and</p>
|
||
<blockquote>
|
||
<div><p>stack(inputs, 1)</p>
|
||
</div></blockquote>
|
||
<p>will produce [[[0, 1], [4, 5]], [[2, 3], [6, 7]]] of shape [2, 2, 2].</p>
|
||
<dl class="simple">
|
||
<dt>Parameters:</dt><dd><dl class="simple">
|
||
<dt>inputs<span class="classifier">Sequence[Tensor]</span></dt><dd><p>The sequence of tensors to stack.</p>
|
||
</dd>
|
||
<dt>dim<span class="classifier">int</span></dt><dd><p>The dimension in which the stack is performed.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd>
|
||
<dt>Returns:</dt><dd><p>A tensor that contains the input tensors stacked along a new dimension.</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.sub">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">sub</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span> <span class="pre">|</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op:</span> <span class="pre">~tensorrt_bindings.tensorrt.ElementWiseOperation</span> <span class="pre">=</span> <span class="pre"><ElementWiseOperation.SUB:</span> <span class="pre">4></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.sub" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation with two inputs.</p>
|
||
<p>For each input, that function first creates a constant tensor if the input
|
||
is an integer or a float. Then, if needed, it expands the smaller tensor to
|
||
make sure its rank is the same as the larger one. Then, it performs the
|
||
elementwise operation ‘op’.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>add for op=trt.ElementWiseOperation.SUM
|
||
sub for op=trt.ElementWiseOperation.SUB
|
||
mul for op=trt.ElementWiseOperation.PROD
|
||
div for op=trt.ElementWiseOperation.DIV
|
||
floordiv for op=trt.ElementWiseOperation.FLOOR_DIV
|
||
gt for op=trt.ElementWiseOperation.GREATER
|
||
lt for op=trt.ElementWiseOperation.LESS
|
||
op_and for op=trt.ElementWiseOperation.AND
|
||
op_or for op=trt.ElementWiseOperation.OR
|
||
eq for op=trt.ElementWiseOperation.EQUAL
|
||
minimum for op=trt.ElementWiseOperation.MIN
|
||
maximum for op=trt.ElementWiseOperation.MAX
|
||
pow for op=trt.ElementWiseOperation.POW</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IElementWiseLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.ElementWiseOperation
|
||
The binary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.sum">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">sum</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">keepdim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#sum"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.sum" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to compute the sum along a dimension.</p>
|
||
<p>Computes the sum along the dimension ‘dim’ of the input tensor.</p>
|
||
<p>It is implemented using the IReduceLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension along which the mean is computed.</p></li>
|
||
<li><p><strong>keepdim</strong> – bool
|
||
Is the dimension kept in the reduced tensor? When True the
|
||
dimension is kept, it is removed from the shape otherwise.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this reduction operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.swiglu">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">swiglu</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#swiglu"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.swiglu" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a SwiGLU (<cite>x * SiLU(gate)</cite>) operation.</p>
|
||
<p>That function takes a tensor, splits it into two halves along the last
|
||
dimension, applies SiLU to the second half and multiply the results. The
|
||
behavior is undefined if the last dimension is not even.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.tanh">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">tanh</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input:</span> <span class="pre">~tensorrt_llm.functional.Tensor</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">*</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">act_type:</span> <span class="pre">~tensorrt_bindings.tensorrt.ActivationType</span> <span class="pre">=</span> <span class="pre"><ActivationType.TANH:</span> <span class="pre">2></span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="headerlink" href="#tensorrt_llm.functional.tanh" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an activation function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor on which the activation function is applied.</p></li>
|
||
<li><p><strong>act_type</strong> – trt.ActivationType
|
||
The type of the activation (RELU, TANH, SIGMOID, …).</p></li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>relu for op=trt.ActivationType.RELU
|
||
tanh for op=trt.ActivationType.TANH
|
||
sigmoid for op=trt.ActivationType.SIGMOID</p>
|
||
</div></blockquote>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>The tensor produced by the activation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.topk">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">topk</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">largest</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">prefer_plugin</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#topk"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.topk" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an topk operation.</p>
|
||
<p>As explained in the ONNX documentation,</p>
|
||
<blockquote>
|
||
<div><p><a class="github reference external" href="https://github.com/onnx/onnx/blob/main/docs/Operators.md#topk">onnx/onnx</a></p>
|
||
</div></blockquote>
|
||
<p>NOTE: One distinction from the ONNX topk op, the output is always sorted
|
||
with TensorRT layer.</p>
|
||
<p>Retrieve the top-K largest elements along a specified axis.
|
||
Given an input tensor of shape [a_1, a_2, …, a_n, r]
|
||
and integer argument k, return two outputs:
|
||
Value tensor of shape [a_1, a_2, …, a_{axis-1}, k, a_{axis+1}, … a_n] which contains the values of the top k elements along the specified axis
|
||
Index tensor of shape [a_1, a_2, …, a_{axis-1}, k, a_{axis+1}, … a_n] which contains the indices of the top k elements (original indices from the input tensor).</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>k</strong> – int
|
||
A single positive value corresponding to the number of top elements to retrieve</p></li>
|
||
<li><p><strong>dim</strong> – int
|
||
The dimension in which to compute the topk indices.</p></li>
|
||
<li><p><strong>largest</strong> – bool
|
||
Controls whether to return largest or smallest elements</p></li>
|
||
<li><p><strong>prefer_plugin</strong> – bool
|
||
Whether to use the topkLastDim plugin if dim is last dim and k is static.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensors (values, indices) produced by this topk operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.transpose">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">transpose</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim0</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">dim1</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#transpose"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.transpose" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to transpose two dimensions of a tensor.</p>
|
||
<p>That operation produces a tensor in which the dimensions ‘dim0’ and ‘dim1’
|
||
are permuted. The other dimensions, if the rank of the tensor is greater
|
||
than 2, remain untouched.</p>
|
||
<p>That function is a helper built on the ‘functional.permute’ function.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to transpose.</p></li>
|
||
<li><p><strong>dim0</strong> – int
|
||
The first dimension to transpose.</p></li>
|
||
<li><p><strong>dim1</strong> – int
|
||
The second dimension to transpose.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the permutation layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.unary">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">unary</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">op</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">UnaryOperation</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#unary"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.unary" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an elementwise operation on a single input.</p>
|
||
<p>The following closures are defined in functional.*:</p>
|
||
<blockquote>
|
||
<div><p>round for op=trt.UnaryOperation.ROUND
|
||
sqrt for op=trt.UnaryOperation.SQRT
|
||
exp for op=trt.UnaryOperation.EXP
|
||
sin for op=trt.UnaryOperation.SIN
|
||
cos for op=trt.UnaryOperation.COS
|
||
abs for op=trt.UnaryOperation.ABS
|
||
log for op=trt.UnaryOperation.LOG</p>
|
||
</div></blockquote>
|
||
<p>It is implemented using the IUnaryLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor.</p></li>
|
||
<li><p><strong>op</strong> – trt.UnaryOperation
|
||
The unary operation to perform.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this elementwise operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.unbind">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">unbind</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">dim</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#unbind"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.unbind" title="Link to this definition">#</a></dt>
|
||
<dd><p>Removes a tensor dimension.</p>
|
||
<p>Returns a tuple of all slices along a given dimension, already without it.</p>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.unsqueeze">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">unsqueeze</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">axis</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#unsqueeze"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.unsqueeze" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to insert a singleton dimension to a tensor.</p>
|
||
<p>That functions creates an operation that insert a singleton dimension
|
||
(dimension of size 1) at position ‘axis’ in the output tensor. It works with
|
||
negative values for the ‘axis’.</p>
|
||
<p>For example, for a tensor ‘input’ of shape [4, 4]:</p>
|
||
<blockquote>
|
||
<div><p>unsqueeze(input, 0) will produce an output of shape [1, 4, 4],
|
||
unsqueeze(input, 1) will produce an output of shape [4, 1, 4],
|
||
unsqueeze(input, -1) will produce an output of shape [4, 4, 1],
|
||
unsqueeze(input, -2) will produce an output of shape [4, 1, 4],</p>
|
||
</div></blockquote>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to expand with a singleton dimension.</p></li>
|
||
<li><p><strong>axis</strong> – int
|
||
The index of the singleton dimension in the output tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.view">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">view</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">input</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">shape</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">zero_is_placeholder</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#view"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.view" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add an operation to create a view of a tensor.</p>
|
||
<p>That operation adds a tensorrt.IShuffleLayer to the network. If the ‘shape’
|
||
parameter is a Tensor, that view is dynamic. Otherwise, it is a static
|
||
view.</p>
|
||
<p>Note that TensorRT limits the number of inferred dimensions to 1. It means
|
||
that the shape sequence or tensor cannot contain more than one -1. This
|
||
function enforces that constraint and will assert if it is not respected.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>input</strong> – Tensor
|
||
The input tensor to transpose.</p></li>
|
||
<li><p><strong>shape</strong> – Union[Tensor, Sequence[int]]
|
||
The shape of the new tensor.</p></li>
|
||
<li><p><strong>zero_is_placeholder</strong> – bool
|
||
When that parameter is True, the 0s in ‘shape’ are replaced by the
|
||
sizes of the corresponding dimensions from the ‘input’. Otherwise,
|
||
the dimensions corresponding to 0s are shrunk.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by the view/shuffle layer.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
<dl class="py function">
|
||
<dt class="sig sig-object py" id="tensorrt_llm.functional.where">
|
||
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.functional.</span></span><span class="sig-name descname"><span class="pre">where</span></span><span class="sig-paren">(</span>
|
||
|
||
<dl>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">condition</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">bool</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">left</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
<dd><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></span></em>,</dd>
|
||
</dl>
|
||
|
||
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/functional.html#where"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.functional.where" title="Link to this definition">#</a></dt>
|
||
<dd><p>Add a where (aka select or if-then-else) operation.</p>
|
||
<p>Assuming the three input parameters have the same shape, that function creates
|
||
the operation to compute a tensor of the same shape such that:</p>
|
||
<blockquote>
|
||
<div><dl class="simple">
|
||
<dt>for ii in range(mul(condition.shape)):</dt><dd><p>output[ii] = left[ii] if condition[ii] else right[ii]</p>
|
||
</dd>
|
||
</dl>
|
||
</div></blockquote>
|
||
<p>For each input, that function first creates a constant tensor if the
|
||
condition is boolean or the left/right input is an integer or a float.
|
||
Then, if needed, it expands the smaller tensor to make sure its
|
||
rank is the same as the larger one. Then, it performs the selection.</p>
|
||
<p>It is implemented using the ISelectLayer from TensorRT.</p>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><ul class="simple">
|
||
<li><p><strong>condition</strong> – Union[Tensor, bool]
|
||
The condition. If that input is a boolean, the function
|
||
creates a constant tensor.</p></li>
|
||
<li><p><strong>left</strong> – Union[Tensor, int, float]
|
||
The first input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
<li><p><strong>right</strong> – Union[Tensor, int, float]
|
||
The second input. If that input is an integer or a float, the
|
||
function creates a constant tensor.</p></li>
|
||
</ul>
|
||
</dd>
|
||
<dt class="field-even">Returns<span class="colon">:</span></dt>
|
||
<dd class="field-even"><p>The tensor produced by this where operation.</p>
|
||
</dd>
|
||
</dl>
|
||
</dd></dl>
|
||
|
||
</section>
|
||
|
||
|
||
</article>
|
||
|
||
|
||
|
||
|
||
|
||
<footer class="prev-next-footer d-print-none">
|
||
|
||
<div class="prev-next-area">
|
||
<a class="left-prev"
|
||
href="tensorrt_llm.layers.html"
|
||
title="previous page">
|
||
<i class="fa-solid fa-angle-left"></i>
|
||
<div class="prev-next-info">
|
||
<p class="prev-next-subtitle">previous</p>
|
||
<p class="prev-next-title">Layers</p>
|
||
</div>
|
||
</a>
|
||
<a class="right-next"
|
||
href="tensorrt_llm.models.html"
|
||
title="next page">
|
||
<div class="prev-next-info">
|
||
<p class="prev-next-subtitle">next</p>
|
||
<p class="prev-next-title">Models</p>
|
||
</div>
|
||
<i class="fa-solid fa-angle-right"></i>
|
||
</a>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
<dialog id="pst-secondary-sidebar-modal"></dialog>
|
||
<div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||
|
||
|
||
<div class="sidebar-secondary-item">
|
||
<div
|
||
id="pst-page-navigation-heading-2"
|
||
class="page-toc tocsection onthispage">
|
||
<i class="fa-solid fa-list"></i> On this page
|
||
</div>
|
||
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
|
||
<ul class="visible nav section-nav flex-column">
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp"><code class="docutils literal notranslate"><span class="pre">AllReduceFusionOp</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB"><code class="docutils literal notranslate"><span class="pre">LAST_PROCESS_FOR_UB</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.MOE_ALLREDUCE_RESIDUAL_RMS_NORM"><code class="docutils literal notranslate"><span class="pre">MOE_ALLREDUCE_RESIDUAL_RMS_NORM</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.NONE"><code class="docutils literal notranslate"><span class="pre">NONE</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM"><code class="docutils literal notranslate"><span class="pre">RESIDUAL_RMS_NORM</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8"><code class="docutils literal notranslate"><span class="pre">RESIDUAL_RMS_NORM_OUT_QUANT_FP8</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4"><code class="docutils literal notranslate"><span class="pre">RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8"><code class="docutils literal notranslate"><span class="pre">RESIDUAL_RMS_NORM_QUANT_FP8</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4"><code class="docutils literal notranslate"><span class="pre">RESIDUAL_RMS_NORM_QUANT_NVFP4</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM"><code class="docutils literal notranslate"><span class="pre">RESIDUAL_RMS_PREPOST_NORM</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceParams"><code class="docutils literal notranslate"><span class="pre">AllReduceParams</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceParams.has_affine"><code class="docutils literal notranslate"><span class="pre">has_affine()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceParams.has_bias"><code class="docutils literal notranslate"><span class="pre">has_bias()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceParams.has_scale"><code class="docutils literal notranslate"><span class="pre">has_scale()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceParams.update_strategy"><code class="docutils literal notranslate"><span class="pre">update_strategy()</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy"><code class="docutils literal notranslate"><span class="pre">AllReduceStrategy</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.AUTO"><code class="docutils literal notranslate"><span class="pre">AUTO</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY"><code class="docutils literal notranslate"><span class="pre">MIN_LATENCY</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.NCCL"><code class="docutils literal notranslate"><span class="pre">NCCL</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.ONESHOT"><code class="docutils literal notranslate"><span class="pre">ONESHOT</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.TWOSHOT"><code class="docutils literal notranslate"><span class="pre">TWOSHOT</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AllReduceStrategy.UB"><code class="docutils literal notranslate"><span class="pre">UB</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType"><code class="docutils literal notranslate"><span class="pre">AttentionMaskType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType.bidirectional"><code class="docutils literal notranslate"><span class="pre">bidirectional</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType.bidirectionalglm"><code class="docutils literal notranslate"><span class="pre">bidirectionalglm</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType.blocksparse"><code class="docutils literal notranslate"><span class="pre">blocksparse</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType.causal"><code class="docutils literal notranslate"><span class="pre">causal</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType.custom_mask"><code class="docutils literal notranslate"><span class="pre">custom_mask</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType.padding"><code class="docutils literal notranslate"><span class="pre">padding</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.AttentionMaskType.sliding_window_causal"><code class="docutils literal notranslate"><span class="pre">sliding_window_causal</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Conditional"><code class="docutils literal notranslate"><span class="pre">Conditional</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Conditional.add_input"><code class="docutils literal notranslate"><span class="pre">add_input()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Conditional.add_output"><code class="docutils literal notranslate"><span class="pre">add_output()</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.DimRange"><code class="docutils literal notranslate"><span class="pre">DimRange</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.LayerNormPositionType"><code class="docutils literal notranslate"><span class="pre">LayerNormPositionType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.LayerNormPositionType.post_layernorm"><code class="docutils literal notranslate"><span class="pre">post_layernorm</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.LayerNormPositionType.pre_layernorm"><code class="docutils literal notranslate"><span class="pre">pre_layernorm</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.LayerNormType"><code class="docutils literal notranslate"><span class="pre">LayerNormType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.LayerNormType.GroupNorm"><code class="docutils literal notranslate"><span class="pre">GroupNorm</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.LayerNormType.LayerNorm"><code class="docutils literal notranslate"><span class="pre">LayerNorm</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.LayerNormType.RmsNorm"><code class="docutils literal notranslate"><span class="pre">RmsNorm</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.MLPType"><code class="docutils literal notranslate"><span class="pre">MLPType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.MLPType.FusedGatedMLP"><code class="docutils literal notranslate"><span class="pre">FusedGatedMLP</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.MLPType.GatedMLP"><code class="docutils literal notranslate"><span class="pre">GatedMLP</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.MLPType.MLP"><code class="docutils literal notranslate"><span class="pre">MLP</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType"><code class="docutils literal notranslate"><span class="pre">PositionEmbeddingType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.alibi"><code class="docutils literal notranslate"><span class="pre">alibi</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale"><code class="docutils literal notranslate"><span class="pre">alibi_with_scale</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.chatglm"><code class="docutils literal notranslate"><span class="pre">chatglm</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.choices"><code class="docutils literal notranslate"><span class="pre">choices()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.deferred"><code class="docutils literal notranslate"><span class="pre">deferred</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.from_string"><code class="docutils literal notranslate"><span class="pre">from_string()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.is_alibi"><code class="docutils literal notranslate"><span class="pre">is_alibi()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.is_deferred"><code class="docutils literal notranslate"><span class="pre">is_deferred()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.is_mrope"><code class="docutils literal notranslate"><span class="pre">is_mrope()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.is_rope"><code class="docutils literal notranslate"><span class="pre">is_rope()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.learned_absolute"><code class="docutils literal notranslate"><span class="pre">learned_absolute</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.long_rope"><code class="docutils literal notranslate"><span class="pre">long_rope</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.mrope"><code class="docutils literal notranslate"><span class="pre">mrope</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.relative"><code class="docutils literal notranslate"><span class="pre">relative</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox"><code class="docutils literal notranslate"><span class="pre">rope_gpt_neox</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.rope_gptj"><code class="docutils literal notranslate"><span class="pre">rope_gptj</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.PositionEmbeddingType.yarn"><code class="docutils literal notranslate"><span class="pre">yarn</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils"><code class="docutils literal notranslate"><span class="pre">RopeEmbeddingUtils</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling"><code class="docutils literal notranslate"><span class="pre">apply_llama3_scaling()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb"><code class="docutils literal notranslate"><span class="pre">apply_rotary_pos_emb()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm"><code class="docutils literal notranslate"><span class="pre">apply_rotary_pos_emb_chatglm()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm"><code class="docutils literal notranslate"><span class="pre">apply_rotary_pos_emb_cogvlm()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight"><code class="docutils literal notranslate"><span class="pre">create_fake_weight()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions"><code class="docutils literal notranslate"><span class="pre">create_sinusoidal_positions()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin"><code class="docutils literal notranslate"><span class="pre">create_sinusoidal_positions_for_attention_plugin()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin"><code class="docutils literal notranslate"><span class="pre">create_sinusoidal_positions_for_cogvlm_attention_plugin()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope"><code class="docutils literal notranslate"><span class="pre">create_sinusoidal_positions_long_rope()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn"><code class="docutils literal notranslate"><span class="pre">create_sinusoidal_positions_yarn()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two"><code class="docutils literal notranslate"><span class="pre">rotate_every_two()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half"><code class="docutils literal notranslate"><span class="pre">rotate_half()</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType"><code class="docutils literal notranslate"><span class="pre">RotaryScalingType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.dynamic"><code class="docutils literal notranslate"><span class="pre">dynamic</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.from_string"><code class="docutils literal notranslate"><span class="pre">from_string()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.linear"><code class="docutils literal notranslate"><span class="pre">linear</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.llama3"><code class="docutils literal notranslate"><span class="pre">llama3</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.longrope"><code class="docutils literal notranslate"><span class="pre">longrope</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.mrope"><code class="docutils literal notranslate"><span class="pre">mrope</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.none"><code class="docutils literal notranslate"><span class="pre">none</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.RotaryScalingType.yarn"><code class="docutils literal notranslate"><span class="pre">yarn</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SideStreamIDType"><code class="docutils literal notranslate"><span class="pre">SideStreamIDType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SideStreamIDType.disable"><code class="docutils literal notranslate"><span class="pre">disable</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SideStreamIDType.moe"><code class="docutils literal notranslate"><span class="pre">moe</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SliceInputType"><code class="docutils literal notranslate"><span class="pre">SliceInputType</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SliceInputType.axes"><code class="docutils literal notranslate"><span class="pre">axes</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SliceInputType.data"><code class="docutils literal notranslate"><span class="pre">data</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SliceInputType.fill_value"><code class="docutils literal notranslate"><span class="pre">fill_value</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SliceInputType.size"><code class="docutils literal notranslate"><span class="pre">size</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SliceInputType.start"><code class="docutils literal notranslate"><span class="pre">start</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.SliceInputType.stride"><code class="docutils literal notranslate"><span class="pre">stride</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor"><code class="docutils literal notranslate"><span class="pre">Tensor</span></code></a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.abs"><code class="docutils literal notranslate"><span class="pre">abs()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.cast"><code class="docutils literal notranslate"><span class="pre">cast()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.dtype"><code class="docutils literal notranslate"><span class="pre">dtype</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.flatten"><code class="docutils literal notranslate"><span class="pre">flatten()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.get_parent"><code class="docutils literal notranslate"><span class="pre">get_parent()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.get_users"><code class="docutils literal notranslate"><span class="pre">get_users()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.is_dynamic"><code class="docutils literal notranslate"><span class="pre">is_dynamic()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.is_trt_wrapper"><code class="docutils literal notranslate"><span class="pre">is_trt_wrapper()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.location"><code class="docutils literal notranslate"><span class="pre">location</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.log"><code class="docutils literal notranslate"><span class="pre">log()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.mark_output"><code class="docutils literal notranslate"><span class="pre">mark_output()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.max"><code class="docutils literal notranslate"><span class="pre">max()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.mean"><code class="docutils literal notranslate"><span class="pre">mean()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.name"><code class="docutils literal notranslate"><span class="pre">name</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.ndim"><code class="docutils literal notranslate"><span class="pre">ndim()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.network"><code class="docutils literal notranslate"><span class="pre">network</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.permute"><code class="docutils literal notranslate"><span class="pre">permute()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.rank"><code class="docutils literal notranslate"><span class="pre">rank()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.repeat"><code class="docutils literal notranslate"><span class="pre">repeat()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.replace_all_uses_with"><code class="docutils literal notranslate"><span class="pre">replace_all_uses_with()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.select"><code class="docutils literal notranslate"><span class="pre">select()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.shape"><code class="docutils literal notranslate"><span class="pre">shape</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.size"><code class="docutils literal notranslate"><span class="pre">size()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.split"><code class="docutils literal notranslate"><span class="pre">split()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.sqrt"><code class="docutils literal notranslate"><span class="pre">sqrt()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.squeeze"><code class="docutils literal notranslate"><span class="pre">squeeze()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.transpose"><code class="docutils literal notranslate"><span class="pre">transpose()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.unbind"><code class="docutils literal notranslate"><span class="pre">unbind()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.unsqueeze"><code class="docutils literal notranslate"><span class="pre">unsqueeze()</span></code></a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.Tensor.view"><code class="docutils literal notranslate"><span class="pre">view()</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.abs"><code class="docutils literal notranslate"><span class="pre">abs()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.activation"><code class="docutils literal notranslate"><span class="pre">activation()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.add"><code class="docutils literal notranslate"><span class="pre">add()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.allgather"><code class="docutils literal notranslate"><span class="pre">allgather()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.allreduce"><code class="docutils literal notranslate"><span class="pre">allreduce()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.arange"><code class="docutils literal notranslate"><span class="pre">arange()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.argmax"><code class="docutils literal notranslate"><span class="pre">argmax()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.assertion"><code class="docutils literal notranslate"><span class="pre">assertion()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.avg_pool2d"><code class="docutils literal notranslate"><span class="pre">avg_pool2d()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.bert_attention"><code class="docutils literal notranslate"><span class="pre">bert_attention()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.broadcast_helper"><code class="docutils literal notranslate"><span class="pre">broadcast_helper()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.cast"><code class="docutils literal notranslate"><span class="pre">cast()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.categorical_sample"><code class="docutils literal notranslate"><span class="pre">categorical_sample()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.chunk"><code class="docutils literal notranslate"><span class="pre">chunk()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.clip"><code class="docutils literal notranslate"><span class="pre">clip()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.concat"><code class="docutils literal notranslate"><span class="pre">concat()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.constant"><code class="docutils literal notranslate"><span class="pre">constant()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.constant_to_tensor_"><code class="docutils literal notranslate"><span class="pre">constant_to_tensor_()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.constants_to_tensors_"><code class="docutils literal notranslate"><span class="pre">constants_to_tensors_()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.conv1d"><code class="docutils literal notranslate"><span class="pre">conv1d()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.conv2d"><code class="docutils literal notranslate"><span class="pre">conv2d()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.conv3d"><code class="docutils literal notranslate"><span class="pre">conv3d()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.conv_transpose2d"><code class="docutils literal notranslate"><span class="pre">conv_transpose2d()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.cos"><code class="docutils literal notranslate"><span class="pre">cos()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.cp_split_plugin"><code class="docutils literal notranslate"><span class="pre">cp_split_plugin()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.create_allreduce_plugin"><code class="docutils literal notranslate"><span class="pre">create_allreduce_plugin()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.cuda_stream_sync"><code class="docutils literal notranslate"><span class="pre">cuda_stream_sync()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.cumsum"><code class="docutils literal notranslate"><span class="pre">cumsum()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.div"><code class="docutils literal notranslate"><span class="pre">div()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.dora_plugin"><code class="docutils literal notranslate"><span class="pre">dora_plugin()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.einsum"><code class="docutils literal notranslate"><span class="pre">einsum()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.elementwise_binary"><code class="docutils literal notranslate"><span class="pre">elementwise_binary()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.embedding"><code class="docutils literal notranslate"><span class="pre">embedding()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.eq"><code class="docutils literal notranslate"><span class="pre">eq()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.exp"><code class="docutils literal notranslate"><span class="pre">exp()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.expand"><code class="docutils literal notranslate"><span class="pre">expand()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.expand_dims"><code class="docutils literal notranslate"><span class="pre">expand_dims()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.expand_dims_like"><code class="docutils literal notranslate"><span class="pre">expand_dims_like()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.expand_mask"><code class="docutils literal notranslate"><span class="pre">expand_mask()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.flatten"><code class="docutils literal notranslate"><span class="pre">flatten()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.flip"><code class="docutils literal notranslate"><span class="pre">flip()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.floordiv"><code class="docutils literal notranslate"><span class="pre">floordiv()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gather"><code class="docutils literal notranslate"><span class="pre">gather()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gather_last_token_logits"><code class="docutils literal notranslate"><span class="pre">gather_last_token_logits()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gather_nd"><code class="docutils literal notranslate"><span class="pre">gather_nd()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gegelu"><code class="docutils literal notranslate"><span class="pre">gegelu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.geglu"><code class="docutils literal notranslate"><span class="pre">geglu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gelu"><code class="docutils literal notranslate"><span class="pre">gelu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gemm_allreduce"><code class="docutils literal notranslate"><span class="pre">gemm_allreduce()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gemm_swiglu"><code class="docutils literal notranslate"><span class="pre">gemm_swiglu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.generate_alibi_biases"><code class="docutils literal notranslate"><span class="pre">generate_alibi_biases()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.generate_alibi_slopes"><code class="docutils literal notranslate"><span class="pre">generate_alibi_slopes()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.generate_logn_scaling"><code class="docutils literal notranslate"><span class="pre">generate_logn_scaling()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gpt_attention"><code class="docutils literal notranslate"><span class="pre">gpt_attention()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.group_norm"><code class="docutils literal notranslate"><span class="pre">group_norm()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.gt"><code class="docutils literal notranslate"><span class="pre">gt()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.identity"><code class="docutils literal notranslate"><span class="pre">identity()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.index_select"><code class="docutils literal notranslate"><span class="pre">index_select()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.int_clip"><code class="docutils literal notranslate"><span class="pre">int_clip()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.interpolate"><code class="docutils literal notranslate"><span class="pre">interpolate()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.is_gated_activation"><code class="docutils literal notranslate"><span class="pre">is_gated_activation()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.layer_norm"><code class="docutils literal notranslate"><span class="pre">layer_norm()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.log"><code class="docutils literal notranslate"><span class="pre">log()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.log_softmax"><code class="docutils literal notranslate"><span class="pre">log_softmax()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.lora_plugin"><code class="docutils literal notranslate"><span class="pre">lora_plugin()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.low_latency_gemm"><code class="docutils literal notranslate"><span class="pre">low_latency_gemm()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.low_latency_gemm_swiglu"><code class="docutils literal notranslate"><span class="pre">low_latency_gemm_swiglu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.lt"><code class="docutils literal notranslate"><span class="pre">lt()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.mamba_conv1d"><code class="docutils literal notranslate"><span class="pre">mamba_conv1d()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.masked_scatter"><code class="docutils literal notranslate"><span class="pre">masked_scatter()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.masked_select"><code class="docutils literal notranslate"><span class="pre">masked_select()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.matmul"><code class="docutils literal notranslate"><span class="pre">matmul()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.max"><code class="docutils literal notranslate"><span class="pre">max()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.maximum"><code class="docutils literal notranslate"><span class="pre">maximum()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.mean"><code class="docutils literal notranslate"><span class="pre">mean()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.meshgrid2d"><code class="docutils literal notranslate"><span class="pre">meshgrid2d()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.min"><code class="docutils literal notranslate"><span class="pre">min()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.minimum"><code class="docutils literal notranslate"><span class="pre">minimum()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.modulo"><code class="docutils literal notranslate"><span class="pre">modulo()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.mul"><code class="docutils literal notranslate"><span class="pre">mul()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.non_gated_version"><code class="docutils literal notranslate"><span class="pre">non_gated_version()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.nonzero"><code class="docutils literal notranslate"><span class="pre">nonzero()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.not_op"><code class="docutils literal notranslate"><span class="pre">not_op()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.op_and"><code class="docutils literal notranslate"><span class="pre">op_and()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.op_or"><code class="docutils literal notranslate"><span class="pre">op_or()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.op_xor"><code class="docutils literal notranslate"><span class="pre">op_xor()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.outer"><code class="docutils literal notranslate"><span class="pre">outer()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.pad"><code class="docutils literal notranslate"><span class="pre">pad()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.permute"><code class="docutils literal notranslate"><span class="pre">permute()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.pow"><code class="docutils literal notranslate"><span class="pre">pow()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.prod"><code class="docutils literal notranslate"><span class="pre">prod()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.quick_gelu"><code class="docutils literal notranslate"><span class="pre">quick_gelu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.rand"><code class="docutils literal notranslate"><span class="pre">rand()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.rearrange"><code class="docutils literal notranslate"><span class="pre">rearrange()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.recv"><code class="docutils literal notranslate"><span class="pre">recv()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.reduce"><code class="docutils literal notranslate"><span class="pre">reduce()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.reduce_scatter"><code class="docutils literal notranslate"><span class="pre">reduce_scatter()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.relu"><code class="docutils literal notranslate"><span class="pre">relu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.repeat"><code class="docutils literal notranslate"><span class="pre">repeat()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.repeat_interleave"><code class="docutils literal notranslate"><span class="pre">repeat_interleave()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.rg_lru"><code class="docutils literal notranslate"><span class="pre">rg_lru()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.rms_norm"><code class="docutils literal notranslate"><span class="pre">rms_norm()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.round"><code class="docutils literal notranslate"><span class="pre">round()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.scatter"><code class="docutils literal notranslate"><span class="pre">scatter()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.scatter_nd"><code class="docutils literal notranslate"><span class="pre">scatter_nd()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.select"><code class="docutils literal notranslate"><span class="pre">select()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.selective_scan"><code class="docutils literal notranslate"><span class="pre">selective_scan()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.send"><code class="docutils literal notranslate"><span class="pre">send()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.shape"><code class="docutils literal notranslate"><span class="pre">shape()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.sigmoid"><code class="docutils literal notranslate"><span class="pre">sigmoid()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.silu"><code class="docutils literal notranslate"><span class="pre">silu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.sin"><code class="docutils literal notranslate"><span class="pre">sin()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.slice"><code class="docutils literal notranslate"><span class="pre">slice()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.softmax"><code class="docutils literal notranslate"><span class="pre">softmax()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.softplus"><code class="docutils literal notranslate"><span class="pre">softplus()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.split"><code class="docutils literal notranslate"><span class="pre">split()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.sqrt"><code class="docutils literal notranslate"><span class="pre">sqrt()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.squared_relu"><code class="docutils literal notranslate"><span class="pre">squared_relu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.squeeze"><code class="docutils literal notranslate"><span class="pre">squeeze()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.stack"><code class="docutils literal notranslate"><span class="pre">stack()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.sub"><code class="docutils literal notranslate"><span class="pre">sub()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.sum"><code class="docutils literal notranslate"><span class="pre">sum()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.swiglu"><code class="docutils literal notranslate"><span class="pre">swiglu()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.tanh"><code class="docutils literal notranslate"><span class="pre">tanh()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.topk"><code class="docutils literal notranslate"><span class="pre">topk()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.transpose"><code class="docutils literal notranslate"><span class="pre">transpose()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.unary"><code class="docutils literal notranslate"><span class="pre">unary()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.unbind"><code class="docutils literal notranslate"><span class="pre">unbind()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.unsqueeze"><code class="docutils literal notranslate"><span class="pre">unsqueeze()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.view"><code class="docutils literal notranslate"><span class="pre">view()</span></code></a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.functional.where"><code class="docutils literal notranslate"><span class="pre">where()</span></code></a></li>
|
||
</ul>
|
||
</nav></div>
|
||
|
||
</div></div>
|
||
|
||
|
||
|
||
</div>
|
||
<footer class="bd-footer-content">
|
||
|
||
</footer>
|
||
|
||
</main>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||
<script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
|
||
<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
|
||
|
||
<footer class="bd-footer">
|
||
<div class="bd-footer__inner bd-page-width">
|
||
|
||
<div class="footer-items__start">
|
||
|
||
<div class="footer-item">
|
||
<a class="footer-brand logo" href="https://www.nvidia.com">
|
||
<img src="../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
|
||
<img src="../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
|
||
</a></div>
|
||
|
||
<div class="footer-item">
|
||
|
||
<div class="footer-links">
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
|
||
|
|
||
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
||
|
|
||
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
|
||
|
|
||
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
|
||
|
|
||
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
|
||
|
|
||
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
|
||
|
|
||
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
|
||
|
|
||
|
||
|
||
|
||
<a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
|
||
|
||
|
||
|
||
</div>
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
|
||
|
||
|
||
<p class="copyright">
|
||
|
||
Copyright © 2025, NVidia.
|
||
<br/>
|
||
|
||
</p>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
</div>
|
||
|
||
</footer>
|
||
</body>
|
||
</html> |