mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
671 lines
180 KiB
HTML
671 lines
180 KiB
HTML
<!DOCTYPE html>
|
|
<html class="writer-html5" lang="en" data-content_root="../">
|
|
<head>
|
|
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>Models — tensorrt_llm documentation</title>
|
|
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
|
|
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
|
|
|
|
|
|
<!--[if lt IE 9]>
|
|
<script src="../_static/js/html5shiv.min.js"></script>
|
|
<![endif]-->
|
|
|
|
<script src="../_static/jquery.js?v=5d32c60e"></script>
|
|
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
|
|
<script src="../_static/doctools.js?v=888ff710"></script>
|
|
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
<script src="../_static/js/theme.js"></script>
|
|
<link rel="index" title="Index" href="../genindex.html" />
|
|
<link rel="search" title="Search" href="../search.html" />
|
|
<link rel="next" title="Plugin" href="tensorrt_llm.plugin.html" />
|
|
<link rel="prev" title="Functionals" href="tensorrt_llm.functional.html" />
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav">
|
|
<div class="wy-grid-for-nav">
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
<div class="wy-side-scroll">
|
|
<div class="wy-side-nav-search" >
|
|
|
|
|
|
|
|
<a href="../index.html" class="icon icon-home">
|
|
tensorrt_llm
|
|
</a>
|
|
<div role="search">
|
|
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
<input type="hidden" name="check_keywords" value="yes" />
|
|
<input type="hidden" name="area" value="default" />
|
|
</form>
|
|
</div>
|
|
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture.html">TensorRT-LLM Architecture</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../gpt_runtime.html">C++ GPT Runtime</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../batch_manager.html">The Batch Manager in TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../gpt_attention.html">Multi-head, Multi-query and Group-query Attention</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../precision.html">Numerical Precision</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation.html">TensorRT-LLM Installation</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../performance.html">Performance of TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../2023-05-19-how-to-debug.html">How to debug</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../2023-05-17-how-to-add-a-new-model.html">How to add a new model</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../graph-rewriting.html">Graph Rewriting Module</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../memory.html">Memory Usage of TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../new_workflow.html">New Workflow</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Python API</span></p>
|
|
<ul class="current">
|
|
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.layers.html">Layers</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.functional.html">Functionals</a></li>
|
|
<li class="toctree-l1 current"><a class="current reference internal" href="#">Models</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.BaichuanForCausalLM"><code class="docutils literal notranslate"><span class="pre">BaichuanForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.BaichuanForCausalLM.forward"><code class="docutils literal notranslate"><span class="pre">BaichuanForCausalLM.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.BaichuanForCausalLM.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">BaichuanForCausalLM.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.BertForQuestionAnswering"><code class="docutils literal notranslate"><span class="pre">BertForQuestionAnswering</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.BertForQuestionAnswering.forward"><code class="docutils literal notranslate"><span class="pre">BertForQuestionAnswering.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.BertModel"><code class="docutils literal notranslate"><span class="pre">BertModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.BertModel.forward"><code class="docutils literal notranslate"><span class="pre">BertModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.BloomForCausalLM"><code class="docutils literal notranslate"><span class="pre">BloomForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.BloomForCausalLM.check_config"><code class="docutils literal notranslate"><span class="pre">BloomForCausalLM.check_config()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.BloomModel"><code class="docutils literal notranslate"><span class="pre">BloomModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.BloomModel.forward"><code class="docutils literal notranslate"><span class="pre">BloomModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.ChatGLMHeadModel"><code class="docutils literal notranslate"><span class="pre">ChatGLMHeadModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.ChatGLMHeadModel.forward"><code class="docutils literal notranslate"><span class="pre">ChatGLMHeadModel.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.ChatGLMHeadModel.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">ChatGLMHeadModel.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.ChatGLMModel"><code class="docutils literal notranslate"><span class="pre">ChatGLMModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.ChatGLMModel.forward"><code class="docutils literal notranslate"><span class="pre">ChatGLMModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.DecoderModel"><code class="docutils literal notranslate"><span class="pre">DecoderModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.DecoderModel.forward"><code class="docutils literal notranslate"><span class="pre">DecoderModel.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.DecoderModel.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">DecoderModel.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.EncoderModel"><code class="docutils literal notranslate"><span class="pre">EncoderModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.EncoderModel.forward"><code class="docutils literal notranslate"><span class="pre">EncoderModel.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.EncoderModel.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">EncoderModel.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.FalconForCausalLM"><code class="docutils literal notranslate"><span class="pre">FalconForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.FalconForCausalLM.forward"><code class="docutils literal notranslate"><span class="pre">FalconForCausalLM.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.FalconForCausalLM.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">FalconForCausalLM.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.FalconModel"><code class="docutils literal notranslate"><span class="pre">FalconModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.FalconModel.forward"><code class="docutils literal notranslate"><span class="pre">FalconModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.GPTJForCausalLM"><code class="docutils literal notranslate"><span class="pre">GPTJForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTJForCausalLM.forward"><code class="docutils literal notranslate"><span class="pre">GPTJForCausalLM.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTJForCausalLM.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">GPTJForCausalLM.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.GPTJModel"><code class="docutils literal notranslate"><span class="pre">GPTJModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTJModel.forward"><code class="docutils literal notranslate"><span class="pre">GPTJModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.GPTLMHeadModel"><code class="docutils literal notranslate"><span class="pre">GPTLMHeadModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTLMHeadModel.forward"><code class="docutils literal notranslate"><span class="pre">GPTLMHeadModel.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTLMHeadModel.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">GPTLMHeadModel.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.GPTModel"><code class="docutils literal notranslate"><span class="pre">GPTModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTModel.forward"><code class="docutils literal notranslate"><span class="pre">GPTModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.GPTNeoXForCausalLM"><code class="docutils literal notranslate"><span class="pre">GPTNeoXForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTNeoXForCausalLM.forward"><code class="docutils literal notranslate"><span class="pre">GPTNeoXForCausalLM.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTNeoXForCausalLM.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">GPTNeoXForCausalLM.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.GPTNeoXModel"><code class="docutils literal notranslate"><span class="pre">GPTNeoXModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.GPTNeoXModel.forward"><code class="docutils literal notranslate"><span class="pre">GPTNeoXModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.LLaMAForCausalLM"><code class="docutils literal notranslate"><span class="pre">LLaMAForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.LLaMAForCausalLM.forward"><code class="docutils literal notranslate"><span class="pre">LLaMAForCausalLM.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.LLaMAForCausalLM.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">LLaMAForCausalLM.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.LLaMAModel"><code class="docutils literal notranslate"><span class="pre">LLaMAModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.LLaMAModel.forward"><code class="docutils literal notranslate"><span class="pre">LLaMAModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.OPTForCausalLM"><code class="docutils literal notranslate"><span class="pre">OPTForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.OPTForCausalLM.check_config"><code class="docutils literal notranslate"><span class="pre">OPTForCausalLM.check_config()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.OPTModel"><code class="docutils literal notranslate"><span class="pre">OPTModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.OPTModel.forward"><code class="docutils literal notranslate"><span class="pre">OPTModel.forward()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.PretrainedConfig"><code class="docutils literal notranslate"><span class="pre">PretrainedConfig</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedConfig.from_dict"><code class="docutils literal notranslate"><span class="pre">PretrainedConfig.from_dict()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedConfig.from_json_file"><code class="docutils literal notranslate"><span class="pre">PretrainedConfig.from_json_file()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedConfig.set_if_not_exist"><code class="docutils literal notranslate"><span class="pre">PretrainedConfig.set_if_not_exist()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedConfig.set_rank"><code class="docutils literal notranslate"><span class="pre">PretrainedConfig.set_rank()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedConfig.to_dict"><code class="docutils literal notranslate"><span class="pre">PretrainedConfig.to_dict()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.PretrainedModel"><code class="docutils literal notranslate"><span class="pre">PretrainedModel</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedModel.check_config"><code class="docutils literal notranslate"><span class="pre">PretrainedModel.check_config()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedModel.from_checkpoint"><code class="docutils literal notranslate"><span class="pre">PretrainedModel.from_checkpoint()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedModel.from_config"><code class="docutils literal notranslate"><span class="pre">PretrainedModel.from_config()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedModel.load"><code class="docutils literal notranslate"><span class="pre">PretrainedModel.load()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.PretrainedModel.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">PretrainedModel.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.QWenForCausalLM"><code class="docutils literal notranslate"><span class="pre">QWenForCausalLM</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.QWenForCausalLM.forward"><code class="docutils literal notranslate"><span class="pre">QWenForCausalLM.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.QWenForCausalLM.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">QWenForCausalLM.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.WhisperEncoder"><code class="docutils literal notranslate"><span class="pre">WhisperEncoder</span></code></a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.WhisperEncoder.forward"><code class="docutils literal notranslate"><span class="pre">WhisperEncoder.forward()</span></code></a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#tensorrt_llm.models.WhisperEncoder.prepare_inputs"><code class="docutils literal notranslate"><span class="pre">WhisperEncoder.prepare_inputs()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#tensorrt_llm.models.quantize_model"><code class="docutils literal notranslate"><span class="pre">quantize_model()</span></code></a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.plugin.html">Plugin</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.quantization.html">Quantization</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="tensorrt_llm.runtime.html">Runtime</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">C++ API</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/runtime.html">Runtime</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">tensorrt_llm</a>
|
|
</nav>
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="Page navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
<li class="breadcrumb-item active">Models</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
<a href="../_sources/python-api/tensorrt_llm.models.rst.txt" rel="nofollow"> View page source</a>
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
<div itemprop="articleBody">
|
|
|
|
<section id="module-tensorrt_llm">
|
|
<span id="models"></span><h1>Models<a class="headerlink" href="#module-tensorrt_llm" title="Link to this heading"></a></h1>
|
|
<dl class="py class" id="module-tensorrt_llm.models">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BaichuanForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">BaichuanForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype='float32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_hidden_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/baichuan/model.html#BaichuanForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BaichuanForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">BaichuanModel</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BaichuanForCausalLM.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/baichuan/model.html#BaichuanForCausalLM.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BaichuanForCausalLM.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BaichuanForCausalLM.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/baichuan/model.html#BaichuanForCausalLM.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BaichuanForCausalLM.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BertForQuestionAnswering">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">BertForQuestionAnswering</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">type_vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_labels=2</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bert/model.html#BertForQuestionAnswering"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BertForQuestionAnswering" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BertForQuestionAnswering.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_lengths</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">token_type_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bert/model.html#BertForQuestionAnswering.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BertForQuestionAnswering.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BertModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">BertModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">type_vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bert/model.html#BertModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BertModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BertModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_lengths</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">token_type_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bert/model.html#BertModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BertModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BloomForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">BloomForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bloom/model.html#BloomForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BloomForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">DecoderModelForCausalLM</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BloomForCausalLM.check_config">
|
|
<span class="sig-name descname"><span class="pre">check_config</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bloom/model.html#BloomForCausalLM.check_config"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BloomForCausalLM.check_config" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BloomModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">BloomModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bloom/model.html#BloomModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BloomModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.BloomModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_tasks</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_vocab_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/bloom/model.html#BloomModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.BloomModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.ChatGLMHeadModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">ChatGLMHeadModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">apply_query_key_layer_scaling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">apply_residual_connection_post_layernorm</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">enable_debug_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ffn_hidden_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">linear_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Mapping</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_output_len</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_seq_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">norm_epsilon</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_layers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">qkv_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantMode" title="tensorrt_llm.quantization.mode.QuantMode"><span class="pre">QuantMode</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rmsnorm</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_embedding_scaling</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tokens_per_block</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/chatglm/model.html#ChatGLMHeadModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.ChatGLMHeadModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="#tensorrt_llm.models.ChatGLMModel" title="tensorrt_llm.models.chatglm.model.ChatGLMModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">ChatGLMModel</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.ChatGLMHeadModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams" title="tensorrt_llm.layers.attention.KeyValueCacheParams"><span class="pre">KeyValueCacheParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams" title="tensorrt_llm.layers.attention.AttentionParams"><span class="pre">AttentionParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/chatglm/model.html#ChatGLMHeadModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.ChatGLMHeadModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.ChatGLMHeadModel.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/chatglm/model.html#ChatGLMHeadModel.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.ChatGLMHeadModel.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.ChatGLMModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">ChatGLMModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/chatglm/model.html#ChatGLMModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.ChatGLMModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.ChatGLMModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams" title="tensorrt_llm.layers.attention.KeyValueCacheParams"><span class="pre">KeyValueCacheParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams" title="tensorrt_llm.layers.attention.AttentionParams"><span class="pre">AttentionParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/chatglm/model.html#ChatGLMModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.ChatGLMModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.DecoderModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">DecoderModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ffn_hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype='float32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">head_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_head_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_num_kv_heads=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_position_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">relative_attention=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_distance=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_buckets=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">type_vocab_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_embedding_layernorm=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_embedding_scale=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">q_scaling=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_attention_qkvo_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_mlp_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_model_final_layernorm=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layernorm_eps=1e-05</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layernorm_position=LayerNormPositionType.pre_layernorm</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layernorm_type=LayerNormType.LayerNorm</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act='relu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_type=MLPType.MLP</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rescale_before_lm_head=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_lm_head_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">residual_scaling=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#DecoderModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.DecoderModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.DecoderModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">decoder_input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">token_type_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">all_reduce_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#DecoderModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.DecoderModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.DecoderModel.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_decoder_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_encoder_input_len</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#DecoderModel.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.DecoderModel.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.EncoderModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">EncoderModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ffn_hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">head_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_position_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">relative_attention=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_distance=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_buckets=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">type_vocab_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_embedding_layernorm=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_embedding_scale=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">q_scaling=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_attention_qkvo_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_mlp_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_model_final_layernorm=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layernorm_eps=1e-05</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layernorm_position=LayerNormPositionType.pre_layernorm</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layernorm_type=LayerNormType.LayerNorm</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act='relu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_type=MLPType.MLP</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">residual_scaling=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#EncoderModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.EncoderModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.EncoderModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_lengths</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">token_type_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">all_reduce_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#EncoderModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.EncoderModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.EncoderModel.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#EncoderModel.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.EncoderModel.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.FalconForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">FalconForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'gelu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype:</span> <span class="pre">str</span> <span class="pre">|</span> <span class="pre">~tensorrt.tensorrt.DataType</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_hidden_size:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bias:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode:</span> <span class="pre">~tensorrt_llm.quantization.mode.QuantMode</span> <span class="pre">=</span> <span class="pre">QuantMode.None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_alibi:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parallel_attention:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_decoder_architecture:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype:</span> <span class="pre">str</span> <span class="pre">|</span> <span class="pre">~tensorrt.tensorrt.DataType</span> <span class="pre">=</span> <span class="pre">'float32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/falcon/model.html#FalconForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.FalconForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="#tensorrt_llm.models.FalconModel" title="tensorrt_llm.models.falcon.model.FalconModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">FalconModel</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.FalconForCausalLM.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">all_reduce_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/falcon/model.html#FalconForCausalLM.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.FalconForCausalLM.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.FalconForCausalLM.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/falcon/model.html#FalconForCausalLM.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.FalconForCausalLM.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used
|
|
to determine the ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.FalconModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">FalconModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings:</span> <span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype:</span> <span class="pre">str</span> <span class="pre">|</span> <span class="pre">~tensorrt.tensorrt.DataType</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping:</span> <span class="pre">~tensorrt_llm.mapping.Mapping</span> <span class="pre">=</span> <span class="pre"><tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_hidden_size:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bias:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode:</span> <span class="pre">~tensorrt_llm.quantization.mode.QuantMode</span> <span class="pre">=</span> <span class="pre">QuantMode.None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_alibi:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parallel_attention:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_decoder_architecture:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/falcon/model.html#FalconModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.FalconModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.FalconModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">all_reduce_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/falcon/model.html#FalconModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.FalconModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTJForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">GPTJForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype='float32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptj/model.html#GPTJForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTJForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="#tensorrt_llm.models.GPTJModel" title="tensorrt_llm.models.gptj.model.GPTJModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">GPTJModel</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTJForCausalLM.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptj/model.html#GPTJForCausalLM.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTJForCausalLM.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTJForCausalLM.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptj/model.html#GPTJForCausalLM.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTJForCausalLM.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTJModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">GPTJModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptj/model.html#GPTJModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTJModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTJModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptj/model.html#GPTJModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTJModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTLMHeadModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">GPTLMHeadModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype='float32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">apply_query_key_layer_scaling=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type=PositionEmbeddingType.learned_absolute</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_embedding_percentage=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_base=10000.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_scaling=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inter_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bias=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_prompt_tuning=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_config=MoeConfig(num_experts=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">top_k=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tp_mode=<ParallelismMode.TENSOR_PARALLEL:</span> <span class="pre">2></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_mode=<ExpertScaleNormalizationMode.RENORMALIZE:</span> <span class="pre">1>)</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">share_embedding_table=False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gpt/model.html#GPTLMHeadModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTLMHeadModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="#tensorrt_llm.models.GPTModel" title="tensorrt_llm.models.gpt.model.GPTModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">GPTModel</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTLMHeadModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_tasks</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_vocab_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lora_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gpt/model.html#GPTLMHeadModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTLMHeadModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTLMHeadModel.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gather_all_token_logits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_draft_len</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lora_target_modules</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gpt/model.html#GPTLMHeadModel.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTLMHeadModel.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">GPTModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">apply_query_key_layer_scaling=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type=PositionEmbeddingType.learned_absolute</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_embedding_percentage=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_base=10000.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_scaling=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inter_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bias=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_prompt_tuning=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_config=MoeConfig(num_experts=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">top_k=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tp_mode=<ParallelismMode.TENSOR_PARALLEL:</span> <span class="pre">2></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_mode=<ExpertScaleNormalizationMode.RENORMALIZE:</span> <span class="pre">1>)</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gpt/model.html#GPTModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_tasks</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_vocab_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lora_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gpt/model.html#GPTModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTNeoXForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">GPTNeoXForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type=PositionEmbeddingType.rope_gpt_neox</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">apply_query_key_layer_scaling=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptneox/model.html#GPTNeoXForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTNeoXForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="#tensorrt_llm.models.GPTNeoXModel" title="tensorrt_llm.models.gptneox.model.GPTNeoXModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">GPTNeoXModel</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTNeoXForCausalLM.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptneox/model.html#GPTNeoXForCausalLM.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTNeoXForCausalLM.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTNeoXForCausalLM.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptneox/model.html#GPTNeoXForCausalLM.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTNeoXForCausalLM.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTNeoXModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">GPTNeoXModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_dim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type=PositionEmbeddingType.rope_gpt_neox</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">apply_query_key_layer_scaling=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptneox/model.html#GPTNeoXModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTNeoXModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.GPTNeoXModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/gptneox/model.html#GPTNeoXModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.GPTNeoXModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.LLaMAForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">LLaMAForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype='float32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_hidden_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type=PositionEmbeddingType.rope_gpt_neox</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_base=10000.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_scaling=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rms_norm_eps=1e-06</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_fused_mlp=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attn_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_config=MoeConfig(num_experts=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">top_k=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tp_mode=<ParallelismMode.TENSOR_PARALLEL:</span> <span class="pre">2></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_mode=<ExpertScaleNormalizationMode.RENORMALIZE:</span> <span class="pre">1>)</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_prompt_tuning:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/llama/model.html#LLaMAForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.LLaMAForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="#tensorrt_llm.models.LLaMAModel" title="tensorrt_llm.models.llama.model.LLaMAModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">LLaMAModel</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.LLaMAForCausalLM.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">all_reduce_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_tasks</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_vocab_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lora_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/llama/model.html#LLaMAForCausalLM.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.LLaMAForCausalLM.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.LLaMAForCausalLM.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gather_all_token_logits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lora_target_modules</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/llama/model.html#LLaMAForCausalLM.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.LLaMAForCausalLM.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.LLaMAModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">LLaMAModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_hidden_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type=PositionEmbeddingType.rope_gpt_neox</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_base=10000.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_scaling=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rms_norm_eps=1e-06</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_fused_mlp=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attn_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_bias=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_config:</span> <span class="pre">~tensorrt_llm.layers.moe.MoeConfig</span> <span class="pre">=</span> <span class="pre">MoeConfig(num_experts=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">top_k=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tp_mode=<ParallelismMode.TENSOR_PARALLEL:</span> <span class="pre">2></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_mode=<ExpertScaleNormalizationMode.RENORMALIZE:</span> <span class="pre">1>)</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_prompt_tuning:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/llama/model.html#LLaMAModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.LLaMAModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.LLaMAModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">all_reduce_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_tasks</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_vocab_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lora_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/llama/model.html#LLaMAModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.LLaMAModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.OPTForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">OPTForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/opt/model.html#OPTForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.OPTForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">DecoderModelForCausalLM</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.OPTForCausalLM.check_config">
|
|
<span class="sig-name descname"><span class="pre">check_config</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/opt/model.html#OPTForCausalLM.check_config"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.OPTForCausalLM.check_config" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.OPTModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">OPTModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/opt/model.html#OPTModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.OPTModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.OPTModel.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_mask</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_tasks</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_vocab_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/opt/model.html#OPTModel.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.OPTModel.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedConfig">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">PretrainedConfig</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">architecture</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_hidden_layers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_attention_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_key_value_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">intermediate_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">norm_epsilon</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding_type</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">world_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tp_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pp_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantMode" title="tensorrt_llm.quantization.mode.QuantMode"><span class="pre">QuantMode</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_kwargs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_prompt_tuning</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedConfig" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedConfig.from_dict">
|
|
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedConfig.from_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedConfig.from_dict" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedConfig.from_json_file">
|
|
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_json_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config_file</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedConfig.from_json_file"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedConfig.from_json_file" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedConfig.set_if_not_exist">
|
|
<span class="sig-name descname"><span class="pre">set_if_not_exist</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">key</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedConfig.set_if_not_exist"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedConfig.set_if_not_exist" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedConfig.set_rank">
|
|
<span class="sig-name descname"><span class="pre">set_rank</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">rank</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedConfig.set_rank"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedConfig.set_rank" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedConfig.to_dict">
|
|
<span class="sig-name descname"><span class="pre">to_dict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedConfig.to_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedConfig.to_dict" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">PretrainedModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedModel"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedModel" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedModel.check_config">
|
|
<span class="sig-name descname"><span class="pre">check_config</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedModel.check_config"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedModel.check_config" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedModel.from_checkpoint">
|
|
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_checkpoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ckpt_dir</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rank</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedModel.from_checkpoint"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedModel.from_checkpoint" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedModel.from_config">
|
|
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.models.PretrainedConfig" title="tensorrt_llm.models.modeling_utils.PretrainedConfig"><span class="pre">PretrainedConfig</span></a></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedModel.from_config"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedModel.from_config" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedModel.load">
|
|
<span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">weights</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedModel.load"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedModel.load" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.PretrainedModel.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prompt_embedding_table_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gather_all_token_logits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lora_target_modules</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#PretrainedModel.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.PretrainedModel.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.QWenForCausalLM">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">QWenForCausalLM</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">seq_length</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_dtype='float32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_hidden_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">neox_rotary_style=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_base=10000.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_scaling=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mapping=<tensorrt_llm.mapping.Mapping</span> <span class="pre">object></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode=QuantMode.None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_parallel_embedding=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_sharding_dim=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rms_norm_eps=1e-06</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/qwen/model.html#QWenForCausalLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.QWenForCausalLM" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">QWenModel</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationMixin</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.QWenForCausalLM.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_ids</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">last_token_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_states</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">all_reduce_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/qwen/model.html#QWenForCausalLM.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.QWenForCausalLM.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.QWenForCausalLM.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/qwen/model.html#QWenForCausalLM.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.QWenForCausalLM.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd><p>@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
|
|
ranges of the dimensions of when using TRT dynamic shapes.</p>
|
|
<p>@return: a list contains values which can be fed into the self.forward()</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.WhisperEncoder">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">WhisperEncoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_mels</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_ctx</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_state</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_head</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_layer</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#WhisperEncoder"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.WhisperEncoder" title="Link to this definition"></a></dt>
|
|
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.WhisperEncoder.forward">
|
|
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor" title="tensorrt_llm.functional.Tensor"><span class="pre">Tensor</span></a></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#WhisperEncoder.forward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.WhisperEncoder.forward" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.WhisperEncoder.prepare_inputs">
|
|
<span class="sig-name descname"><span class="pre">prepare_inputs</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">16</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/enc_dec/model.html#WhisperEncoder.prepare_inputs"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.WhisperEncoder.prepare_inputs" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py function">
|
|
<dt class="sig sig-object py" id="tensorrt_llm.models.quantize_model">
|
|
<span class="sig-prename descclassname"><span class="pre">tensorrt_llm.models.</span></span><span class="sig-name descname"><span class="pre">quantize_model</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantMode" title="tensorrt_llm.quantization.mode.QuantMode"><span class="pre">QuantMode</span></a></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/quantized/quant.html#quantize_model"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.models.quantize_model" title="Link to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</section>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
|
<a href="tensorrt_llm.functional.html" class="btn btn-neutral float-left" title="Functionals" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
|
<a href="tensorrt_llm.plugin.html" class="btn btn-neutral float-right" title="Plugin" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
|
</div>
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<p>© Copyright 2023, NVidia.</p>
|
|
</div>
|
|
|
|
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
|
|
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</div>
|
|
<script>
|
|
jQuery(function () {
|
|
SphinxRtdTheme.Navigation.enable(true);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html> |