mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-28 22:56:13 +08:00
2307 lines
72 KiB
HTML
2307 lines
72 KiB
HTML
<!DOCTYPE html>
|
|
<html class="writer-html5" lang="en" data-content_root="../">
|
|
<head>
|
|
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>Overview — tensorrt_llm documentation</title>
|
|
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
|
|
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
|
|
|
|
|
|
<!--[if lt IE 9]>
|
|
<script src="../_static/js/html5shiv.min.js"></script>
|
|
<![endif]-->
|
|
|
|
<script src="../_static/jquery.js?v=5d32c60e"></script>
|
|
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
|
|
<script src="../_static/doctools.js?v=888ff710"></script>
|
|
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
<script src="../_static/js/theme.js"></script>
|
|
<link rel="index" title="Index" href="../genindex.html" />
|
|
<link rel="search" title="Search" href="../search.html" />
|
|
<link rel="next" title="Best Practices for Tuning the Performance of TensorRT-LLM" href="perf-best-practices.html" />
|
|
<link rel="prev" title="Expert Parallelism in TensorRT-LLM" href="../advanced/expert-parallelism.html" />
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav">
|
|
<div class="wy-grid-for-nav">
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
<div class="wy-side-scroll">
|
|
<div class="wy-side-nav-search" >
|
|
|
|
|
|
|
|
<a href="../index.html" class="icon icon-home">
|
|
tensorrt_llm
|
|
</a>
|
|
<div role="search">
|
|
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
<input type="hidden" name="check_keywords" value="yes" />
|
|
<input type="hidden" name="area" value="default" />
|
|
</form>
|
|
</div>
|
|
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
<p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../key-features.html">Key Features</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../release-notes.html">Release Notes</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Installation</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/linux.html">Installing on Linux</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/windows.html">Installing on Windows</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/build-from-source-windows.html">Building from Source Code on Windows</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">LLM API Examples</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api-examples/index.html">LLM Examples Introduction</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api-examples/customization.html">Common Customizations</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api-examples/llm_api_examples.html">Examples</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">LLM API</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api/index.html">API Reference</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Model Definition API</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.layers.html">Layers</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.functional.html">Functionals</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.models.html">Models</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.plugin.html">Plugin</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html">Quantization</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.runtime.html">Runtime</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">C++ API</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/executor.html">Executor</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/runtime.html">Runtime</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Command-Line Reference</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-build.html">trtllm-build</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Architecture</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/overview.html">TensorRT-LLM Architecture</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html">Model Definition</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html#compilation">Compilation</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html#runtime">Runtime</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html#multi-gpu-and-multi-node-support">Multi-GPU and Multi-Node Support</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/checkpoint.html">TensorRT-LLM Checkpoint</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/workflow.html">TensorRT-LLM Build Workflow</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/add-model.html">Adding a Model</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Advanced</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-runtime.html">C++ GPT Runtime</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/graph-rewriting.html">Graph Rewriting Module</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/batch-manager.html">The Batch Manager in TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/inference-request.html">Inference Request</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/inference-request.html#responses">Responses</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/lora.html">Run gpt-2b + LoRA using GptManager / cpp runtime</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/expert-parallelism.html">Expert Parallelism in TensorRT-LLM</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Performance</span></p>
|
|
<ul class="current">
|
|
<li class="toctree-l1 current"><a class="current reference internal" href="#">Overview</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="#known-issues">Known Issues</a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#fused-matmul-gated-silu-llama">Fused Matmul + Gated-SiLU (LLaMA)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#throughput-measurements">Throughput Measurements</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#reproducing-benchmarked-results">Reproducing Benchmarked Results</a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#commands">Commands</a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#variables">Variables</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#preparing-a-dataset">Preparing a Dataset</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#engine-building">Engine Building</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#running-the-benchmark">Running the Benchmark</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="perf-best-practices.html">Best Practices for Tuning the Performance of TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="perf-analysis.html">Performance Analysis</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/troubleshooting.html">Troubleshooting</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/support-matrix.html">Support Matrix</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/precision.html">Numerical Precision</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/memory.html">Memory Usage of TensorRT-LLM</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/quantization-in-TRT-LLM.html">Speed up inference with SOTA quantization techniques in TRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">tensorrt_llm</a>
|
|
</nav>
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="Page navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
<li class="breadcrumb-item active">Overview</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
<a href="../_sources/performance/perf-overview.md.txt" rel="nofollow"> View page source</a>
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
<div itemprop="articleBody">
|
|
|
|
<blockquote id="perf-overview">
|
|
<div><p>[!IMPORTANT]
|
|
As of TensorRT-LLM v0.10, these performance benchmarks have changed methodology to utilize in-flight batching and
|
|
no longer utilize static benchmarking. These numbers are initial measurements and are expected to improve in future
|
|
releases.</p>
|
|
</div></blockquote>
|
|
<section id="overview">
|
|
<h1>Overview<a class="headerlink" href="#overview" title="Link to this heading"></a></h1>
|
|
<p>This document summarizes performance measurements of TensorRT-LLM on H100
|
|
(Hopper), L40S (Ada) and A100 (Ampere) GPUs for a few key models.</p>
|
|
<p>The data in the following tables is provided as a reference point to help users
|
|
validate observed performance. It should not be considered as the peak
|
|
performance that can be delivered by TensorRT-LLM.</p>
|
|
<section id="known-issues">
|
|
<h2>Known Issues<a class="headerlink" href="#known-issues" title="Link to this heading"></a></h2>
|
|
<p>The following issues are being addressed to improve the efficiency of TensorRT-LLM.</p>
|
|
<section id="fused-matmul-gated-silu-llama">
|
|
<h3>Fused Matmul + Gated-SiLU (LLaMA)<a class="headerlink" href="#fused-matmul-gated-silu-llama" title="Link to this heading"></a></h3>
|
|
<p>The current implementation combines two Matmul operations into one Matmul followed by
|
|
a separate SwiGLU kernel (when <code class="docutils literal notranslate"><span class="pre">--use_fused_mlp=enable</span></code> is enabled). There is also a more
|
|
efficient implementation that runs single Matmul + SwiGLU fused kernel for FP8 on Hopper
|
|
(when <code class="docutils literal notranslate"><span class="pre">--use_fused_mlp=enable</span> <span class="pre">--gemm_swiglu_plugin</span> <span class="pre">fp8</span></code> is enabled). The gemm_swiglu_plugin
|
|
will support more data types and GPU architectures in the future release.</p>
|
|
</section>
|
|
</section>
|
|
<section id="throughput-measurements">
|
|
<h2>Throughput Measurements<a class="headerlink" href="#throughput-measurements" title="Link to this heading"></a></h2>
|
|
<p>The below table shows performance data where a local inference client is fed requests at an infinite rate (no delay between messages),
|
|
and shows the throughput client-server scenario under maximum load.</p>
|
|
<p>The performance numbers below were collected using the steps described in this document.</p>
|
|
<p><strong>All data in the table below was generated using version 0.13.0 and presents token throughput in tokens/second.</strong></p>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p><strong>GPU</strong></p></td>
|
|
<td><p><strong>H200 141GB HBM3</strong></p></td>
|
|
<td><p><strong>GH200 120GB</strong></p></td>
|
|
<td><p><strong>H100 80GB HBM3</strong></p></td>
|
|
<td><p><strong>H100 80GB HBM3</strong></p></td>
|
|
<td><p><strong>A100-SXM4-80GB</strong></p></td>
|
|
<td><p><strong>L40S</strong></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p><strong>Precision</strong></p></td>
|
|
<td><p><strong>FP8</strong></p></td>
|
|
<td><p><strong>FP8</strong></p></td>
|
|
<td><p><strong>FP8</strong></p></td>
|
|
<td><p><strong>FP16</strong></p></td>
|
|
<td><p><strong>FP16</strong></p></td>
|
|
<td><p><strong>FP8</strong></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p><strong>Model</strong></p></td>
|
|
<td><p><strong>Input/Output Lengths</strong></p></td>
|
|
<td><p><strong>TP</strong></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>GPTJ 6B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>24,533.54</p></td>
|
|
<td><p>22,368.50</p></td>
|
|
<td><p>24,318.61</p></td>
|
|
<td><p>12,936.63</p></td>
|
|
<td><p>5,964.19</p></td>
|
|
<td><p>7,688.44</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>8,375.67</p></td>
|
|
<td><p>6,588.73</p></td>
|
|
<td><p>7,829.91</p></td>
|
|
<td><p>3,931.61</p></td>
|
|
<td><p>2,215.88</p></td>
|
|
<td><p>1,842.82</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>5,048.59</p></td>
|
|
<td><p>3,662.81</p></td>
|
|
<td><p>3,955.28</p></td>
|
|
<td><p>2,041.06</p></td>
|
|
<td><p>1,118.12</p></td>
|
|
<td><p>980.23</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,770.27</p></td>
|
|
<td><p>2,520.37</p></td>
|
|
<td><p>2,698.08</p></td>
|
|
<td><p>1,479.48</p></td>
|
|
<td><p>650.09</p></td>
|
|
<td><p>746.54</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1,791.39</p></td>
|
|
<td><p>1,449.23</p></td>
|
|
<td><p>1,623.17</p></td>
|
|
<td><p>818.80</p></td>
|
|
<td><p>436.85</p></td>
|
|
<td><p>413.33</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>6,770.60</p></td>
|
|
<td><p>5,565.62</p></td>
|
|
<td><p>6,149.65</p></td>
|
|
<td><p>3,030.03</p></td>
|
|
<td><p>1,673.05</p></td>
|
|
<td><p>1,538.45</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>6,465.73</p></td>
|
|
<td><p>5,580.37</p></td>
|
|
<td><p>6,078.80</p></td>
|
|
<td><p>2,797.48</p></td>
|
|
<td><p>1,673.45</p></td>
|
|
<td><p>1,531.57</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,637.42</p></td>
|
|
<td><p>2,998.01</p></td>
|
|
<td><p>3,060.80</p></td>
|
|
<td><p>1,285.08</p></td>
|
|
<td><p>845.83</p></td>
|
|
<td><p>753.55</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>LLaMA v3.1 8B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>28,125.59</p></td>
|
|
<td><p>26,045.60</p></td>
|
|
<td><p>27,147.22</p></td>
|
|
<td><p>15,647.83</p></td>
|
|
<td><p>6,687.04</p></td>
|
|
<td><p>8,548.90</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>22,989.20</p></td>
|
|
<td><p>16,497.79</p></td>
|
|
<td><p>19,221.02</p></td>
|
|
<td><p>8,882.95</p></td>
|
|
<td><p>4,918.53</p></td>
|
|
<td><p>4,988.61</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>16,077.62</p></td>
|
|
<td><p>9,637.91</p></td>
|
|
<td><p>11,856.11</p></td>
|
|
<td><p>5,462.96</p></td>
|
|
<td><p>3,054.46</p></td>
|
|
<td><p>2,768.91</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,625.83</p></td>
|
|
<td><p>3,357.60</p></td>
|
|
<td><p>3,497.30</p></td>
|
|
<td><p>1,859.37</p></td>
|
|
<td><p>796.17</p></td>
|
|
<td><p>1,000.90</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,823.76</p></td>
|
|
<td><p>3,217.40</p></td>
|
|
<td><p>3,276.69</p></td>
|
|
<td><p>1,687.74</p></td>
|
|
<td><p>788.66</p></td>
|
|
<td><p>872.14</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>19,382.37</p></td>
|
|
<td><p>15,128.77</p></td>
|
|
<td><p>13,996.05</p></td>
|
|
<td><p>6,834.76</p></td>
|
|
<td><p>3,929.83</p></td>
|
|
<td><p>3,911.14</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>16,435.21</p></td>
|
|
<td><p>12,355.41</p></td>
|
|
<td><p>13,411.43</p></td>
|
|
<td><p>7,160.92</p></td>
|
|
<td><p>3,592.16</p></td>
|
|
<td><p>3,648.21</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>11,072.97</p></td>
|
|
<td><p>7,850.75</p></td>
|
|
<td><p>8,851.23</p></td>
|
|
<td><p>4,152.21</p></td>
|
|
<td><p>2,269.78</p></td>
|
|
<td><p>2,055.78</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1,634.98</p></td>
|
|
<td><p>1,200.89</p></td>
|
|
<td><p>1,278.04</p></td>
|
|
<td><p>595.89</p></td>
|
|
<td><p>316.43</p></td>
|
|
<td><p>263.75</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>LLaMA v3 8B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>27,940.47</p></td>
|
|
<td><p>26,117.13</p></td>
|
|
<td><p>27,156.81</p></td>
|
|
<td><p>15,489.11</p></td>
|
|
<td><p>6,656.98</p></td>
|
|
<td><p>8,734.57</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>23,228.98</p></td>
|
|
<td><p>16,417.04</p></td>
|
|
<td><p>19,209.17</p></td>
|
|
<td><p>8,901.43</p></td>
|
|
<td><p>4,967.37</p></td>
|
|
<td><p>5,004.93</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>15,980.94</p></td>
|
|
<td><p>9,351.95</p></td>
|
|
<td><p>11,889.67</p></td>
|
|
<td><p>5,455.91</p></td>
|
|
<td><p>3,053.27</p></td>
|
|
<td><p>2,768.15</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,631.45</p></td>
|
|
<td><p>3,339.90</p></td>
|
|
<td><p>3,476.37</p></td>
|
|
<td><p>1,918.56</p></td>
|
|
<td><p>796.28</p></td>
|
|
<td><p>1,050.68</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,836.98</p></td>
|
|
<td><p>3,186.22</p></td>
|
|
<td><p>3,279.24</p></td>
|
|
<td><p>1,668.42</p></td>
|
|
<td><p>792.95</p></td>
|
|
<td><p>860.31</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>19,725.45</p></td>
|
|
<td><p>15,241.74</p></td>
|
|
<td><p>14,218.30</p></td>
|
|
<td><p>6,816.62</p></td>
|
|
<td><p>3,899.64</p></td>
|
|
<td><p>3,990.73</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>16,201.60</p></td>
|
|
<td><p>12,049.81</p></td>
|
|
<td><p>13,371.60</p></td>
|
|
<td><p>7,041.47</p></td>
|
|
<td><p>3,617.10</p></td>
|
|
<td><p>3,679.10</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>11,097.69</p></td>
|
|
<td><p>7,255.55</p></td>
|
|
<td><p>8,852.87</p></td>
|
|
<td><p>4,251.45</p></td>
|
|
<td><p>2,269.68</p></td>
|
|
<td><p>2,048.94</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>LLaMA v2 7B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>19,549.13</p></td>
|
|
<td><p>17,823.45</p></td>
|
|
<td><p>19,298.99</p></td>
|
|
<td><p>11,436.31</p></td>
|
|
<td><p>5,238.68</p></td>
|
|
<td><p>6,396.62</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>7,675.14</p></td>
|
|
<td><p>5,438.53</p></td>
|
|
<td><p>6,607.33</p></td>
|
|
<td><p>2,985.61</p></td>
|
|
<td><p>1,807.39</p></td>
|
|
<td><p>1,566.03</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>4,397.83</p></td>
|
|
<td><p>3,310.09</p></td>
|
|
<td><p>3,628.46</p></td>
|
|
<td><p>1,575.35</p></td>
|
|
<td><p>957.24</p></td>
|
|
<td><p>821.83</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,392.31</p></td>
|
|
<td><p>2,064.18</p></td>
|
|
<td><p>2,304.02</p></td>
|
|
<td><p>1,157.55</p></td>
|
|
<td><p>560.35</p></td>
|
|
<td><p>619.83</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1,570.37</p></td>
|
|
<td><p>1,250.11</p></td>
|
|
<td><p>1,419.09</p></td>
|
|
<td><p>624.75</p></td>
|
|
<td><p>366.39</p></td>
|
|
<td><p>347.03</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>6,044.15</p></td>
|
|
<td><p>4,717.51</p></td>
|
|
<td><p>5,188.69</p></td>
|
|
<td><p>2,382.75</p></td>
|
|
<td><p>1,408.58</p></td>
|
|
<td><p>1,231.78</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>5,896.10</p></td>
|
|
<td><p>4,825.24</p></td>
|
|
<td><p>5,208.97</p></td>
|
|
<td><p>2,462.65</p></td>
|
|
<td><p>1,431.92</p></td>
|
|
<td><p>1,277.79</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,193.42</p></td>
|
|
<td><p>2,693.21</p></td>
|
|
<td><p>2,792.53</p></td>
|
|
<td><p>1,263.11</p></td>
|
|
<td><p>734.38</p></td>
|
|
<td><p>641.47</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>Mistral 7B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>30,152.19</p></td>
|
|
<td><p>27,738.08</p></td>
|
|
<td><p>29,672.75</p></td>
|
|
<td><p>16,711.12</p></td>
|
|
<td><p>6,863.59</p></td>
|
|
<td><p>9,676.88</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>24,742.09</p></td>
|
|
<td><p>17,528.14</p></td>
|
|
<td><p>20,318.60</p></td>
|
|
<td><p>9,774.11</p></td>
|
|
<td><p>5,321.44</p></td>
|
|
<td><p>5,437.25</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>16,905.49</p></td>
|
|
<td><p>10,671.38</p></td>
|
|
<td><p>12,715.46</p></td>
|
|
<td><p>5,740.41</p></td>
|
|
<td><p>3,257.23</p></td>
|
|
<td><p>2,941.08</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,676.37</p></td>
|
|
<td><p>3,369.77</p></td>
|
|
<td><p>3,502.83</p></td>
|
|
<td><p>1,893.42</p></td>
|
|
<td><p>796.00</p></td>
|
|
<td><p>996.65</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,890.07</p></td>
|
|
<td><p>3,401.45</p></td>
|
|
<td><p>3,358.65</p></td>
|
|
<td><p>1,740.69</p></td>
|
|
<td><p>807.07</p></td>
|
|
<td><p>904.45</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>20,788.70</p></td>
|
|
<td><p>15,035.59</p></td>
|
|
<td><p>15,962.94</p></td>
|
|
<td><p>7,494.80</p></td>
|
|
<td><p>4,168.89</p></td>
|
|
<td><p>4,088.52</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>17,620.46</p></td>
|
|
<td><p>13,362.84</p></td>
|
|
<td><p>14,213.48</p></td>
|
|
<td><p>7,281.07</p></td>
|
|
<td><p>3,794.31</p></td>
|
|
<td><p>3,972.63</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>11,747.88</p></td>
|
|
<td><p>8,599.03</p></td>
|
|
<td><p>9,200.19</p></td>
|
|
<td><p>4,349.39</p></td>
|
|
<td><p>2,320.50</p></td>
|
|
<td><p>2,170.16</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1,693.41</p></td>
|
|
<td><p>1,271.85</p></td>
|
|
<td><p>1,299.05</p></td>
|
|
<td><p>609.91</p></td>
|
|
<td><p>324.52</p></td>
|
|
<td><p>276.19</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>LLaMA v3.1 405B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3,734.50</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3,039.70</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3,144.97</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>454.17</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>459.91</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,967.98</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,259.32</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,067.15</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>447.67</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>LLaMA v3.1 70B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,923.61</p></td>
|
|
<td><p>2,998.99</p></td>
|
|
<td><p>2,168.72</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>5,358.16</p></td>
|
|
<td><p>1,839.02</p></td>
|
|
<td><p>5,215.12</p></td>
|
|
<td><p>3,156.10</p></td>
|
|
<td><p>1,340.20</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8,969.59</p></td>
|
|
<td><p>8,655.98</p></td>
|
|
<td><p>8,677.59</p></td>
|
|
<td><p>5,845.53</p></td>
|
|
<td><p>2,426.46</p></td>
|
|
<td><p>1,434.63</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>16,449.68</p></td>
|
|
<td><p></p></td>
|
|
<td><p>15,711.60</p></td>
|
|
<td><p>10,643.75</p></td>
|
|
<td><p>4,491.42</p></td>
|
|
<td><p>1,365.36</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,503.59</p></td>
|
|
<td><p>1,343.53</p></td>
|
|
<td><p>344.22</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>7,068.42</p></td>
|
|
<td><p>1,146.08</p></td>
|
|
<td><p>5,654.43</p></td>
|
|
<td><p>801.82</p></td>
|
|
<td><p>498.44</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>12,890.95</p></td>
|
|
<td><p>10,358.10</p></td>
|
|
<td><p>9,377.87</p></td>
|
|
<td><p>4,791.11</p></td>
|
|
<td><p>2,460.91</p></td>
|
|
<td><p>1,748.87</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>19,947.02</p></td>
|
|
<td><p></p></td>
|
|
<td><p>15,168.97</p></td>
|
|
<td><p>6,892.18</p></td>
|
|
<td><p>4,148.33</p></td>
|
|
<td><p>1,890.62</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,314.83</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6,227.19</p></td>
|
|
<td><p>896.56</p></td>
|
|
<td><p>3,302.41</p></td>
|
|
<td><p>413.22</p></td>
|
|
<td><p>268.86</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>10,059.64</p></td>
|
|
<td><p>6,628.22</p></td>
|
|
<td><p>6,501.69</p></td>
|
|
<td><p>3,056.98</p></td>
|
|
<td><p>1,660.93</p></td>
|
|
<td><p>1,180.87</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>14,393.28</p></td>
|
|
<td><p></p></td>
|
|
<td><p>9,699.99</p></td>
|
|
<td><p>4,238.15</p></td>
|
|
<td><p>2,705.77</p></td>
|
|
<td><p>1,417.60</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>459.73</p></td>
|
|
<td><p>372.44</p></td>
|
|
<td><p>211.51</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>689.30</p></td>
|
|
<td><p>280.61</p></td>
|
|
<td><p>690.05</p></td>
|
|
<td><p>323.66</p></td>
|
|
<td><p>143.39</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1,047.96</p></td>
|
|
<td><p>1,015.14</p></td>
|
|
<td><p>1,016.24</p></td>
|
|
<td><p>672.37</p></td>
|
|
<td><p>278.87</p></td>
|
|
<td><p>167.87</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,061.19</p></td>
|
|
<td><p></p></td>
|
|
<td><p>1,964.49</p></td>
|
|
<td><p>1,273.97</p></td>
|
|
<td><p>539.57</p></td>
|
|
<td><p>163.91</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>534.79</p></td>
|
|
<td><p>283.19</p></td>
|
|
<td><p>112.21</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>943.78</p></td>
|
|
<td><p>337.04</p></td>
|
|
<td><p>897.36</p></td>
|
|
<td><p>224.31</p></td>
|
|
<td><p>115.63</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1,437.45</p></td>
|
|
<td><p>1,383.61</p></td>
|
|
<td><p>1,329.82</p></td>
|
|
<td><p>851.12</p></td>
|
|
<td><p>361.39</p></td>
|
|
<td><p>235.90</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,795.95</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2,472.69</p></td>
|
|
<td><p>1,438.10</p></td>
|
|
<td><p>679.27</p></td>
|
|
<td><p>224.33</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,758.24</p></td>
|
|
<td><p>1,083.48</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6,063.53</p></td>
|
|
<td><p>851.46</p></td>
|
|
<td><p>4,347.69</p></td>
|
|
<td><p>652.34</p></td>
|
|
<td><p>423.06</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>10,061.89</p></td>
|
|
<td><p>9,090.78</p></td>
|
|
<td><p>8,378.16</p></td>
|
|
<td><p>3,441.34</p></td>
|
|
<td><p>2,072.88</p></td>
|
|
<td><p>1,436.41</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>16,139.49</p></td>
|
|
<td><p></p></td>
|
|
<td><p>10,790.85</p></td>
|
|
<td><p>5,792.17</p></td>
|
|
<td><p>3,115.20</p></td>
|
|
<td><p>1,512.78</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,539.65</p></td>
|
|
<td><p>728.79</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>4,572.03</p></td>
|
|
<td><p>1,223.92</p></td>
|
|
<td><p>3,880.41</p></td>
|
|
<td><p>737.40</p></td>
|
|
<td><p>451.82</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>7,612.56</p></td>
|
|
<td><p>6,705.02</p></td>
|
|
<td><p>6,553.00</p></td>
|
|
<td><p>3,655.64</p></td>
|
|
<td><p>1,731.86</p></td>
|
|
<td><p>1,113.18</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>12,660.86</p></td>
|
|
<td><p></p></td>
|
|
<td><p>11,121.10</p></td>
|
|
<td><p>5,599.45</p></td>
|
|
<td><p>3,013.95</p></td>
|
|
<td><p>1,120.73</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1,753.58</p></td>
|
|
<td><p>611.08</p></td>
|
|
<td><p>161.60</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>3,407.26</p></td>
|
|
<td><p>626.26</p></td>
|
|
<td><p>2,432.55</p></td>
|
|
<td><p></p></td>
|
|
<td><p>108.91</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>6,565.77</p></td>
|
|
<td><p>4,864.55</p></td>
|
|
<td><p>4,948.83</p></td>
|
|
<td><p>2,396.06</p></td>
|
|
<td><p>1,220.93</p></td>
|
|
<td><p>855.44</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>9,948.56</p></td>
|
|
<td><p></p></td>
|
|
<td><p>8,527.52</p></td>
|
|
<td><p>3,819.60</p></td>
|
|
<td><p>2,103.68</p></td>
|
|
<td><p>924.89</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>262.82</p></td>
|
|
<td><p>88.89</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>598.19</p></td>
|
|
<td><p>177.04</p></td>
|
|
<td><p>414.17</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1,047.27</p></td>
|
|
<td><p>958.88</p></td>
|
|
<td><p>856.31</p></td>
|
|
<td><p>375.85</p></td>
|
|
<td><p>187.42</p></td>
|
|
<td><p>140.73</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>1,793.52</p></td>
|
|
<td><p></p></td>
|
|
<td><p>1,359.27</p></td>
|
|
<td><p>650.78</p></td>
|
|
<td><p>344.41</p></td>
|
|
<td><p>122.04</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>LLaMA v3 70B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,924.02</p></td>
|
|
<td><p>3,161.73</p></td>
|
|
<td><p>2,177.84</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>5,388.22</p></td>
|
|
<td><p>1,551.84</p></td>
|
|
<td><p>5,205.80</p></td>
|
|
<td><p>3,186.61</p></td>
|
|
<td><p>1,321.55</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8,958.95</p></td>
|
|
<td><p>8,618.55</p></td>
|
|
<td><p>8,678.68</p></td>
|
|
<td><p>5,857.16</p></td>
|
|
<td><p>2,424.68</p></td>
|
|
<td><p>1,432.46</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>16,375.41</p></td>
|
|
<td><p></p></td>
|
|
<td><p>15,703.26</p></td>
|
|
<td><p>10,627.36</p></td>
|
|
<td><p>4,490.19</p></td>
|
|
<td><p>1,333.09</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,519.24</p></td>
|
|
<td><p>1,346.37</p></td>
|
|
<td><p>353.68</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>7,071.54</p></td>
|
|
<td><p>862.54</p></td>
|
|
<td><p>5,878.06</p></td>
|
|
<td><p>802.98</p></td>
|
|
<td><p>512.11</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>12,876.38</p></td>
|
|
<td><p>10,015.23</p></td>
|
|
<td><p>8,929.23</p></td>
|
|
<td><p>4,768.27</p></td>
|
|
<td><p>2,458.73</p></td>
|
|
<td><p>1,737.31</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>20,013.92</p></td>
|
|
<td><p></p></td>
|
|
<td><p>15,171.91</p></td>
|
|
<td><p>6,875.97</p></td>
|
|
<td><p>3,906.35</p></td>
|
|
<td><p>1,892.41</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,310.85</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6,199.95</p></td>
|
|
<td><p>602.98</p></td>
|
|
<td><p>3,311.05</p></td>
|
|
<td><p>413.29</p></td>
|
|
<td><p>269.02</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>9,633.49</p></td>
|
|
<td><p>7,370.19</p></td>
|
|
<td><p>6,489.95</p></td>
|
|
<td><p>3,053.89</p></td>
|
|
<td><p>1,677.51</p></td>
|
|
<td><p>1,199.71</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>14,552.09</p></td>
|
|
<td><p></p></td>
|
|
<td><p>9,632.02</p></td>
|
|
<td><p>4,259.39</p></td>
|
|
<td><p>2,697.61</p></td>
|
|
<td><p>1,358.34</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>458.75</p></td>
|
|
<td><p>371.70</p></td>
|
|
<td><p>210.27</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>694.00</p></td>
|
|
<td><p>277.85</p></td>
|
|
<td><p>692.74</p></td>
|
|
<td><p>321.71</p></td>
|
|
<td><p>144.61</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1,048.84</p></td>
|
|
<td><p>1,016.03</p></td>
|
|
<td><p>1,022.77</p></td>
|
|
<td><p>690.10</p></td>
|
|
<td><p>279.06</p></td>
|
|
<td><p>168.52</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,072.33</p></td>
|
|
<td><p></p></td>
|
|
<td><p>1,976.76</p></td>
|
|
<td><p>1,273.41</p></td>
|
|
<td><p>542.93</p></td>
|
|
<td><p>158.63</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>533.37</p></td>
|
|
<td><p>303.33</p></td>
|
|
<td><p>112.68</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>936.82</p></td>
|
|
<td><p>379.62</p></td>
|
|
<td><p>899.29</p></td>
|
|
<td><p>224.65</p></td>
|
|
<td><p>115.00</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1,442.76</p></td>
|
|
<td><p>1,384.62</p></td>
|
|
<td><p>1,326.95</p></td>
|
|
<td><p>853.73</p></td>
|
|
<td><p>361.06</p></td>
|
|
<td><p>235.19</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,797.36</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2,483.56</p></td>
|
|
<td><p>1,437.15</p></td>
|
|
<td><p>678.70</p></td>
|
|
<td><p>225.15</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,763.89</p></td>
|
|
<td><p>1,074.62</p></td>
|
|
<td><p>293.47</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6,054.46</p></td>
|
|
<td><p>1,109.13</p></td>
|
|
<td><p>4,356.55</p></td>
|
|
<td><p>683.11</p></td>
|
|
<td><p>423.82</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>10,103.08</p></td>
|
|
<td><p>7,325.93</p></td>
|
|
<td><p>8,370.32</p></td>
|
|
<td><p>3,436.29</p></td>
|
|
<td><p>2,064.47</p></td>
|
|
<td><p>1,412.78</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>16,857.45</p></td>
|
|
<td><p></p></td>
|
|
<td><p>10,760.65</p></td>
|
|
<td><p>5,665.02</p></td>
|
|
<td><p>3,159.89</p></td>
|
|
<td><p>1,517.76</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,540.45</p></td>
|
|
<td><p>1,164.45</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>4,590.38</p></td>
|
|
<td><p>1,040.64</p></td>
|
|
<td><p>3,879.25</p></td>
|
|
<td><p>768.53</p></td>
|
|
<td><p>453.73</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>7,606.92</p></td>
|
|
<td><p>6,655.61</p></td>
|
|
<td><p>6,547.23</p></td>
|
|
<td><p>3,655.19</p></td>
|
|
<td><p>1,732.86</p></td>
|
|
<td><p>1,117.53</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>12,660.32</p></td>
|
|
<td><p></p></td>
|
|
<td><p>11,155.47</p></td>
|
|
<td><p>5,617.24</p></td>
|
|
<td><p>2,894.58</p></td>
|
|
<td><p>1,126.50</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1,746.77</p></td>
|
|
<td><p>610.87</p></td>
|
|
<td><p>162.10</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>3,405.72</p></td>
|
|
<td><p>738.51</p></td>
|
|
<td><p>2,548.70</p></td>
|
|
<td><p></p></td>
|
|
<td><p>108.66</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>6,571.34</p></td>
|
|
<td><p>4,880.28</p></td>
|
|
<td><p>5,060.39</p></td>
|
|
<td><p>2,391.55</p></td>
|
|
<td><p>1,222.11</p></td>
|
|
<td><p>854.65</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>9,923.96</p></td>
|
|
<td><p></p></td>
|
|
<td><p>8,480.48</p></td>
|
|
<td><p>3,826.38</p></td>
|
|
<td><p>2,181.07</p></td>
|
|
<td><p>927.54</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>LLaMA v2 70B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,969.25</p></td>
|
|
<td><p>3,502.35</p></td>
|
|
<td><p>3,413.82</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6,394.64</p></td>
|
|
<td><p>3,252.69</p></td>
|
|
<td><p>6,432.82</p></td>
|
|
<td><p>3,170.28</p></td>
|
|
<td><p>1,336.48</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>11,031.42</p></td>
|
|
<td><p>11,126.95</p></td>
|
|
<td><p>10,865.42</p></td>
|
|
<td><p>6,420.88</p></td>
|
|
<td><p>2,766.00</p></td>
|
|
<td><p>1,487.71</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>17,060.04</p></td>
|
|
<td><p></p></td>
|
|
<td><p>16,384.83</p></td>
|
|
<td><p>11,146.15</p></td>
|
|
<td><p>4,742.74</p></td>
|
|
<td><p>1,404.99</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,742.99</p></td>
|
|
<td><p>1,660.81</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6,453.25</p></td>
|
|
<td><p>1,335.80</p></td>
|
|
<td><p>5,775.34</p></td>
|
|
<td><p>757.21</p></td>
|
|
<td><p>476.46</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>13,869.67</p></td>
|
|
<td><p>11,098.69</p></td>
|
|
<td><p>9,536.82</p></td>
|
|
<td><p>5,274.27</p></td>
|
|
<td><p>2,686.16</p></td>
|
|
<td><p>1,880.22</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>19,220.48</p></td>
|
|
<td><p></p></td>
|
|
<td><p>17,715.01</p></td>
|
|
<td><p>8,904.94</p></td>
|
|
<td><p>5,520.41</p></td>
|
|
<td><p>2,186.68</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,459.63</p></td>
|
|
<td><p></p></td>
|
|
<td><p>446.60</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>4,831.03</p></td>
|
|
<td><p>684.68</p></td>
|
|
<td><p>3,354.60</p></td>
|
|
<td><p>385.98</p></td>
|
|
<td><p>235.22</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8,988.84</p></td>
|
|
<td><p>8,397.13</p></td>
|
|
<td><p>7,619.62</p></td>
|
|
<td><p>3,228.36</p></td>
|
|
<td><p>1,941.07</p></td>
|
|
<td><p>1,318.51</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>15,115.41</p></td>
|
|
<td><p></p></td>
|
|
<td><p>12,506.95</p></td>
|
|
<td><p>5,996.81</p></td>
|
|
<td><p>3,539.36</p></td>
|
|
<td><p>1,782.93</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>458.88</p></td>
|
|
<td><p>400.31</p></td>
|
|
<td><p>328.90</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>745.71</p></td>
|
|
<td><p>457.57</p></td>
|
|
<td><p>742.17</p></td>
|
|
<td><p>308.02</p></td>
|
|
<td><p>138.81</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1,297.10</p></td>
|
|
<td><p>1,330.90</p></td>
|
|
<td><p>1,270.78</p></td>
|
|
<td><p>755.30</p></td>
|
|
<td><p>321.72</p></td>
|
|
<td><p>171.67</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,060.53</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2,009.57</p></td>
|
|
<td><p>1,348.71</p></td>
|
|
<td><p>561.71</p></td>
|
|
<td><p>160.37</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>548.46</p></td>
|
|
<td><p>364.00</p></td>
|
|
<td><p>224.17</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>1,020.86</p></td>
|
|
<td><p>335.07</p></td>
|
|
<td><p>885.67</p></td>
|
|
<td><p>212.20</p></td>
|
|
<td><p>112.43</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1,759.69</p></td>
|
|
<td><p>1,683.26</p></td>
|
|
<td><p>1,590.94</p></td>
|
|
<td><p>837.57</p></td>
|
|
<td><p>386.78</p></td>
|
|
<td><p>231.54</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2,839.69</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2,546.12</p></td>
|
|
<td><p>1,570.91</p></td>
|
|
<td><p>709.66</p></td>
|
|
<td><p>238.59</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3,019.28</p></td>
|
|
<td><p>1,364.66</p></td>
|
|
<td><p>716.54</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6,402.94</p></td>
|
|
<td><p>1,292.24</p></td>
|
|
<td><p>4,462.98</p></td>
|
|
<td><p>629.21</p></td>
|
|
<td><p>387.61</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>12,429.18</p></td>
|
|
<td><p>8,951.07</p></td>
|
|
<td><p>8,753.09</p></td>
|
|
<td><p>4,012.41</p></td>
|
|
<td><p>2,158.17</p></td>
|
|
<td><p>1,517.53</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>16,789.12</p></td>
|
|
<td><p></p></td>
|
|
<td><p>15,260.29</p></td>
|
|
<td><p>7,384.79</p></td>
|
|
<td><p>4,104.80</p></td>
|
|
<td><p>1,739.28</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2,706.04</p></td>
|
|
<td><p>1,449.83</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>4,693.24</p></td>
|
|
<td><p>960.39</p></td>
|
|
<td><p>3,958.45</p></td>
|
|
<td><p>736.68</p></td>
|
|
<td><p>425.70</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8,557.11</p></td>
|
|
<td><p>7,278.64</p></td>
|
|
<td><p>6,817.41</p></td>
|
|
<td><p>3,866.05</p></td>
|
|
<td><p>1,876.40</p></td>
|
|
<td><p>1,188.91</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>13,483.04</p></td>
|
|
<td><p></p></td>
|
|
<td><p>11,511.74</p></td>
|
|
<td><p>6,543.96</p></td>
|
|
<td><p>3,285.82</p></td>
|
|
<td><p>1,241.42</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1,911.20</p></td>
|
|
<td><p>798.50</p></td>
|
|
<td><p>412.37</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>3,408.82</p></td>
|
|
<td><p>767.24</p></td>
|
|
<td><p>2,551.21</p></td>
|
|
<td><p>388.82</p></td>
|
|
<td><p>226.60</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>6,702.46</p></td>
|
|
<td><p>5,354.80</p></td>
|
|
<td><p>5,212.02</p></td>
|
|
<td><p>2,512.22</p></td>
|
|
<td><p>1,316.92</p></td>
|
|
<td><p>891.95</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>10,348.65</p></td>
|
|
<td><p></p></td>
|
|
<td><p>8,016.14</p></td>
|
|
<td><p>4,414.75</p></td>
|
|
<td><p>2,492.09</p></td>
|
|
<td><p>1,083.26</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>Mixtral 8x7B</p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>25,135.25</p></td>
|
|
<td><p>8,512.51</p></td>
|
|
<td><p>24,572.90</p></td>
|
|
<td><p>15,395.59</p></td>
|
|
<td><p>5,927.88</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>42,394.61</p></td>
|
|
<td><p>40,148.01</p></td>
|
|
<td><p>40,309.25</p></td>
|
|
<td><p>27,747.43</p></td>
|
|
<td><p>11,205.51</p></td>
|
|
<td><p>6,784.44</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>54,648.80</p></td>
|
|
<td><p></p></td>
|
|
<td><p>51,683.16</p></td>
|
|
<td><p>40,116.51</p></td>
|
|
<td><p>18,496.66</p></td>
|
|
<td><p>6,437.72</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>29,412.17</p></td>
|
|
<td><p>3,271.02</p></td>
|
|
<td><p>20,938.80</p></td>
|
|
<td><p>7,391.51</p></td>
|
|
<td><p>4,278.79</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>52,603.13</p></td>
|
|
<td><p>43,071.34</p></td>
|
|
<td><p>40,580.94</p></td>
|
|
<td><p>21,332.15</p></td>
|
|
<td><p>10,946.58</p></td>
|
|
<td><p>7,475.05</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>70,427.00</p></td>
|
|
<td><p></p></td>
|
|
<td><p>64,161.64</p></td>
|
|
<td><p>41,101.18</p></td>
|
|
<td><p>21,235.99</p></td>
|
|
<td><p>9,955.21</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>21,312.11</p></td>
|
|
<td><p>2,254.56</p></td>
|
|
<td><p></p></td>
|
|
<td><p>3,896.02</p></td>
|
|
<td><p>2,388.14</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>39,353.01</p></td>
|
|
<td><p>30,065.77</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>7,108.03</p></td>
|
|
<td><p>5,232.44</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>32,992.62</p></td>
|
|
<td><p></p></td>
|
|
<td><p>47,860.65</p></td>
|
|
<td><p>27,261.67</p></td>
|
|
<td><p>15,943.70</p></td>
|
|
<td><p>8,081.21</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>2,946.01</p></td>
|
|
<td><p>921.87</p></td>
|
|
<td><p>2,894.09</p></td>
|
|
<td><p>1,790.49</p></td>
|
|
<td><p>684.71</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>5,237.58</p></td>
|
|
<td><p>5,056.60</p></td>
|
|
<td><p>4,988.14</p></td>
|
|
<td><p>3,354.89</p></td>
|
|
<td><p>1,338.54</p></td>
|
|
<td><p>803.50</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>7,053.32</p></td>
|
|
<td><p></p></td>
|
|
<td><p>6,559.63</p></td>
|
|
<td><p>5,072.46</p></td>
|
|
<td><p>2,244.39</p></td>
|
|
<td><p>753.39</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>3,848.10</p></td>
|
|
<td><p>997.06</p></td>
|
|
<td><p>3,630.24</p></td>
|
|
<td><p>1,656.04</p></td>
|
|
<td><p>739.84</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>6,877.65</p></td>
|
|
<td><p>6,466.39</p></td>
|
|
<td><p>6,237.22</p></td>
|
|
<td><p>3,607.46</p></td>
|
|
<td><p>1,619.49</p></td>
|
|
<td><p>1,048.60</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>9,531.26</p></td>
|
|
<td><p></p></td>
|
|
<td><p>8,709.34</p></td>
|
|
<td><p>6,237.96</p></td>
|
|
<td><p>2,927.13</p></td>
|
|
<td><p>1,109.25</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>23,539.24</p></td>
|
|
<td><p>2,773.86</p></td>
|
|
<td><p>16,886.30</p></td>
|
|
<td><p>5,773.33</p></td>
|
|
<td><p>3,325.73</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>40,035.05</p></td>
|
|
<td><p>33,478.35</p></td>
|
|
<td><p>32,047.73</p></td>
|
|
<td><p>16,897.03</p></td>
|
|
<td><p>8,908.09</p></td>
|
|
<td><p>6,153.32</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>60,572.77</p></td>
|
|
<td><p></p></td>
|
|
<td><p>41,597.80</p></td>
|
|
<td><p>31,392.32</p></td>
|
|
<td><p>16,954.54</p></td>
|
|
<td><p>7,980.34</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>18,644.51</p></td>
|
|
<td><p>4,540.15</p></td>
|
|
<td><p>14,154.95</p></td>
|
|
<td><p>5,826.43</p></td>
|
|
<td><p>3,289.27</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>32,709.62</p></td>
|
|
<td><p>29,046.16</p></td>
|
|
<td><p>25,291.30</p></td>
|
|
<td><p>14,307.91</p></td>
|
|
<td><p>7,461.63</p></td>
|
|
<td><p>4,697.19</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>44,072.88</p></td>
|
|
<td><p></p></td>
|
|
<td><p>40,628.46</p></td>
|
|
<td><p>27,633.48</p></td>
|
|
<td><p>13,741.62</p></td>
|
|
<td><p>5,706.17</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>14,017.70</p></td>
|
|
<td><p>2,870.77</p></td>
|
|
<td><p>10,448.79</p></td>
|
|
<td><p>3,535.21</p></td>
|
|
<td><p>1,954.32</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>25,550.44</p></td>
|
|
<td><p>21,488.32</p></td>
|
|
<td><p>19,977.11</p></td>
|
|
<td><p>9,620.99</p></td>
|
|
<td><p>5,191.30</p></td>
|
|
<td><p>3,593.18</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>24,999.94</p></td>
|
|
<td><p></p></td>
|
|
<td><p>31,678.85</p></td>
|
|
<td><p>19,372.52</p></td>
|
|
<td><p>10,572.07</p></td>
|
|
<td><p>4,860.61</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>2,195.84</p></td>
|
|
<td><p>367.81</p></td>
|
|
<td><p>1,583.86</p></td>
|
|
<td><p>626.60</p></td>
|
|
<td><p>320.41</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>4,086.41</p></td>
|
|
<td><p>3,301.28</p></td>
|
|
<td><p>2,982.42</p></td>
|
|
<td><p>1,586.09</p></td>
|
|
<td><p>807.67</p></td>
|
|
<td><p>579.49</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>5,797.57</p></td>
|
|
<td><p></p></td>
|
|
<td><p>5,163.91</p></td>
|
|
<td><p>3,106.98</p></td>
|
|
<td><p>1,653.55</p></td>
|
|
<td><p>821.64</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p><em>TP stands for Tensor Parallelism</em></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
<section id="reproducing-benchmarked-results">
|
|
<h2>Reproducing Benchmarked Results<a class="headerlink" href="#reproducing-benchmarked-results" title="Link to this heading"></a></h2>
|
|
<blockquote>
|
|
<div><p>[!NOTE] The only models supported in this workflow are those listed in the table above.</p>
|
|
</div></blockquote>
|
|
<p>The following tables are references for commands that are used as part of the benchmarking process. For a more detailed
|
|
description of this benchmarking workflow, see the <span class="xref myst">Benchmarking Suite README</span>.</p>
|
|
<section id="commands">
|
|
<h3>Commands<a class="headerlink" href="#commands" title="Link to this heading"></a></h3>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head text-left"><p>Stage</p></th>
|
|
<th class="head"><p>Description</p></th>
|
|
<th class="head"><p>Command</p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td class="text-left"><p><a class="reference internal" href="#preparing-a-dataset"><span class="xref myst">Dataset</span></a></p></td>
|
|
<td><p>Create a synthetic dataset</p></td>
|
|
<td><p><code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">benchmarks/cpp/prepare_dataset.py</span> <span class="pre">--tokenizer=$model_name</span> <span class="pre">--stdout</span> <span class="pre">token-norm-dist</span> <span class="pre">--num-requests=$num_requests</span> <span class="pre">--input-mean=$isl</span> <span class="pre">--output-mean=$osl</span> <span class="pre">--input-stdev=0</span> <span class="pre">--output-stdev=0</span> <span class="pre">></span> <span class="pre">$dataset_file</span></code></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><a class="reference internal" href="#engine-building"><span class="xref myst">Build</span></a></p></td>
|
|
<td><p>Build a TensorRT-LLM engine</p></td>
|
|
<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">--model</span> <span class="pre">$model_name</span> <span class="pre">build</span> <span class="pre">--tp_size</span> <span class="pre">$tp_size</span> <span class="pre">--quantization</span> <span class="pre">FP8</span> <span class="pre">--dataset</span> <span class="pre">$dataset_file</span></code></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><a class="reference internal" href="#running-the-benchmark"><span class="xref myst">Run</span></a></p></td>
|
|
<td><p>Run a benchmark with a dataset</p></td>
|
|
<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">--model</span> <span class="pre">$model_name</span> <span class="pre">throughput</span> <span class="pre">--dataset</span> <span class="pre">$dataset_file</span> <span class="pre">--engine_dir</span> <span class="pre">$engine_dir</span></code></p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
<section id="variables">
|
|
<h3>Variables<a class="headerlink" href="#variables" title="Link to this heading"></a></h3>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head text-left"><p>Name</p></th>
|
|
<th class="head"><p>Description</p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$isl</span></code></p></td>
|
|
<td><p>Benchmark input sequence length.</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$osl</span></code></p></td>
|
|
<td><p>Benchmark output sequence length.</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$tp_size</span></code></p></td>
|
|
<td><p>Number of GPUs to run the benchmark with</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$engine_dir</span></code></p></td>
|
|
<td><p>Location to store built engine file (can be deleted after running benchmarks).</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$model_name</span></code></p></td>
|
|
<td><p>HuggingFace model name eg. meta-llama/Llama-2-7b-hf or use the path to a local weights directory</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$dataset_file</span></code></p></td>
|
|
<td><p>Location of the dataset file generated by <code class="docutils literal notranslate"><span class="pre">prepare_dataset.py</span></code></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$num_requests</span></code></p></td>
|
|
<td><p>The number of requests to generate for dataset generation</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$seq_len</span></code></p></td>
|
|
<td><p>A sequence length of ISL + OSL</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
</section>
|
|
<section id="preparing-a-dataset">
|
|
<h2>Preparing a Dataset<a class="headerlink" href="#preparing-a-dataset" title="Link to this heading"></a></h2>
|
|
<p>In order to prepare a dataset, you can use the provided <a class="reference download internal" download="" href="../_downloads/ea8faa5e98124e92f96b66dc586fb429/prepare_dataset.py"><span class="xref download myst">script</span></a>.
|
|
To generate a synthetic dataset, run the following command:</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span>--output<span class="o">=</span><span class="nv">$dataset_file</span><span class="w"> </span>--tokenizer<span class="o">=</span><span class="nv">$model_name</span><span class="w"> </span>token-norm-dist<span class="w"> </span>--num-requests<span class="o">=</span><span class="nv">$num_requests</span><span class="w"> </span>--input-mean<span class="o">=</span><span class="nv">$isl</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="nv">$osl</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>><span class="w"> </span><span class="nv">$dataset_file</span>
|
|
</pre></div>
|
|
</div>
|
|
<p>The command will generate a text file located at the path specified <code class="docutils literal notranslate"><span class="pre">$dataset_file</span></code> where all requests are of the same
|
|
input/output sequence length combinations. The script works by using the tokenizer to retrieve the vocabulary size and
|
|
randomly sample token IDs from it to create entirely random sequences. In the command above, all requests will be uniform
|
|
because the standard deviations for both input and output sequences are set to 0.</p>
|
|
<p>For each input and output sequence length combination, the table below details the <code class="docutils literal notranslate"><span class="pre">$num_requests</span></code> that were used. For
|
|
shorter input and output lengths, a larger number of messages were used to guarantee that the system hit a steady state
|
|
because requests enter and exit the system at a much faster rate. For longer input/output sequence lengths, requests
|
|
remain in the system longer and therefore require less requests to achieve steady state.</p>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head"><p>Input Length</p></th>
|
|
<th class="head"><p>Output Length</p></th>
|
|
<th class="head"><p>$seq_len</p></th>
|
|
<th class="head"><p>$num_requests</p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td><p>128</p></td>
|
|
<td><p>128</p></td>
|
|
<td><p>256</p></td>
|
|
<td><p>30000</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>128</p></td>
|
|
<td><p>2048</p></td>
|
|
<td><p>2176</p></td>
|
|
<td><p>3000</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>128</p></td>
|
|
<td><p>4096</p></td>
|
|
<td><p>4224</p></td>
|
|
<td><p>1500</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>2048</p></td>
|
|
<td><p>128</p></td>
|
|
<td><p>2176</p></td>
|
|
<td><p>3000</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>2048</p></td>
|
|
<td><p>2048</p></td>
|
|
<td><p>4096</p></td>
|
|
<td><p>1500</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>5000</p></td>
|
|
<td><p>500</p></td>
|
|
<td><p>5500</p></td>
|
|
<td><p>1500</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>1000</p></td>
|
|
<td><p>1000</p></td>
|
|
<td><p>2000</p></td>
|
|
<td><p>3000</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>500</p></td>
|
|
<td><p>2000</p></td>
|
|
<td><p>2500</p></td>
|
|
<td><p>3000</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>20000</p></td>
|
|
<td><p>2000</p></td>
|
|
<td><p>22000</p></td>
|
|
<td><p>1000</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
<section id="engine-building">
|
|
<h2>Engine Building<a class="headerlink" href="#engine-building" title="Link to this heading"></a></h2>
|
|
<p>All engines are built using the <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">build</span></code> sub-command. The basic command for FP8 quantized engines is as follows:</p>
|
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>trtllm-bench --model $model_name build --tp_size $tp_size --quantization FP8 --dataset $dataset_file
|
|
</pre></div>
|
|
</div>
|
|
<p>or if you would like to build for a specific sequence length:</p>
|
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>trtllm-bench --model $model_name build --tp_size $tp_size --quantization FP8 --max_seq_length $seq_len
|
|
</pre></div>
|
|
</div>
|
|
<p>If you would like to build an FP16 engine without any quantization, simply remove the <code class="docutils literal notranslate"><span class="pre">--quantization</span> <span class="pre">FP8</span></code> option.</p>
|
|
<blockquote>
|
|
<div><p>[!NOTE] If you specify FP8 quantization, the KV cache will automatically be set to FP8 as well!</p>
|
|
</div></blockquote>
|
|
<p>The <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">build</span></code> sub-command will output the path where the engine is located upon a successful build. For example,</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="o">===========================================================</span>
|
|
ENGINE<span class="w"> </span>SAVED:<span class="w"> </span>/tmp/meta-llama/Llama-2-7b-hf/tp_1_pp_1
|
|
<span class="o">===========================================================</span>
|
|
</pre></div>
|
|
</div>
|
|
</section>
|
|
<section id="running-the-benchmark">
|
|
<h2>Running the Benchmark<a class="headerlink" href="#running-the-benchmark" title="Link to this heading"></a></h2>
|
|
<p>To run the benchmark with the generated data set, simply use the <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">throughput</span></code> sub-command. The benchmarker will
|
|
run an offline maximum throughput scenario such that all requests are queued in rapid succession. You simply need to provide
|
|
the patch to the engine from the <a class="reference internal" href="#engine-building"><span class="xref myst">build</span></a> phase and a <a class="reference internal" href="#preparing-a-dataset"><span class="xref myst">generated dataset</span></a>.</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span>--model<span class="w"> </span><span class="nv">$model_name</span><span class="w"> </span>throughput<span class="w"> </span>--dataset<span class="w"> </span><span class="nv">$dataset_file</span><span class="w"> </span>--engine_dir<span class="w"> </span><span class="nv">$engine_dir</span>
|
|
</pre></div>
|
|
</div>
|
|
<p>The results will be printed to the terminal upon benchmark completion. For example,</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="o">===========================================================</span>
|
|
<span class="o">=</span><span class="w"> </span>ENGINE<span class="w"> </span><span class="nv">DETAILS</span>
|
|
<span class="o">===========================================================</span>
|
|
Model:<span class="w"> </span>meta-llama/Llama-2-7b-hf
|
|
Engine<span class="w"> </span>Directory:<span class="w"> </span>/tmp/meta-llama/Llama-2-7b-hf/tp_1_pp_1
|
|
TensorRT-LLM<span class="w"> </span>Version:<span class="w"> </span><span class="m">0</span>.12.0
|
|
Dtype:<span class="w"> </span>float16
|
|
KV<span class="w"> </span>Cache<span class="w"> </span>Dtype:<span class="w"> </span>FP8
|
|
Quantization:<span class="w"> </span>FP8
|
|
Max<span class="w"> </span>Input<span class="w"> </span>Length:<span class="w"> </span><span class="m">2048</span>
|
|
Max<span class="w"> </span>Sequence<span class="w"> </span>Length:<span class="w"> </span><span class="nv">4098</span>
|
|
|
|
<span class="o">===========================================================</span>
|
|
<span class="o">=</span><span class="w"> </span>WORLD<span class="w"> </span>+<span class="w"> </span>RUNTIME<span class="w"> </span><span class="nv">INFORMATION</span>
|
|
<span class="o">===========================================================</span>
|
|
TP<span class="w"> </span>Size:<span class="w"> </span><span class="m">1</span>
|
|
PP<span class="w"> </span>Size:<span class="w"> </span><span class="m">1</span>
|
|
Max<span class="w"> </span>Runtime<span class="w"> </span>Batch<span class="w"> </span>Size:<span class="w"> </span><span class="m">4096</span>
|
|
Max<span class="w"> </span>Runtime<span class="w"> </span>Tokens:<span class="w"> </span><span class="m">8192</span>
|
|
Scheduling<span class="w"> </span>Policy:<span class="w"> </span>Guaranteed<span class="w"> </span>No<span class="w"> </span>Evict
|
|
KV<span class="w"> </span>Memory<span class="w"> </span>Percentage:<span class="w"> </span><span class="m">99</span>.0%
|
|
Issue<span class="w"> </span>Rate<span class="w"> </span><span class="o">(</span>req/sec<span class="o">)</span>:<span class="w"> </span><span class="m">3</span>.680275266452667e+18
|
|
<span class="o">===========================================================</span>
|
|
<span class="o">=</span><span class="w"> </span><span class="nv">STATISTICS</span>
|
|
<span class="o">===========================================================</span>
|
|
Number<span class="w"> </span>of<span class="w"> </span>requests:<span class="w"> </span><span class="m">3000</span>
|
|
Average<span class="w"> </span>Input<span class="w"> </span>Length<span class="w"> </span><span class="o">(</span>tokens<span class="o">)</span>:<span class="w"> </span><span class="m">128</span>.0
|
|
Average<span class="w"> </span>Output<span class="w"> </span>Length<span class="w"> </span><span class="o">(</span>tokens<span class="o">)</span>:<span class="w"> </span><span class="m">128</span>.0
|
|
Token<span class="w"> </span>Throughput<span class="w"> </span><span class="o">(</span>tokens/sec<span class="o">)</span>:<span class="w"> </span><span class="m">23405</span>.927228471104
|
|
Request<span class="w"> </span>Throughput<span class="w"> </span><span class="o">(</span>req/sec<span class="o">)</span>:<span class="w"> </span><span class="m">182</span>.8588064724305
|
|
Total<span class="w"> </span>Latency<span class="w"> </span><span class="o">(</span>seconds<span class="o">)</span>:<span class="w"> </span><span class="m">16</span>.406100739
|
|
<span class="o">===========================================================</span>
|
|
</pre></div>
|
|
</div>
|
|
<blockquote>
|
|
<div><p>[!WARNING] In some cases, the benchmarker may not print anything at all. This behavior usually
|
|
means that the benchmark has hit an out of memory issue. Try reducing the KV cache percentage
|
|
using the <code class="docutils literal notranslate"><span class="pre">--kv_cache_free_gpu_mem_fraction</span></code> option to lower the percentage of used memory.</p>
|
|
</div></blockquote>
|
|
</section>
|
|
</section>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
|
<a href="../advanced/expert-parallelism.html" class="btn btn-neutral float-left" title="Expert Parallelism in TensorRT-LLM" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
|
<a href="perf-best-practices.html" class="btn btn-neutral float-right" title="Best Practices for Tuning the Performance of TensorRT-LLM" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
|
</div>
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<jinja2.runtime.BlockReference object at 0x7fb972be4340>
|
|
|
|
<div class="footer">
|
|
<p>
|
|
Copyright © 2024 NVIDIA Corporation
|
|
</p>
|
|
<p>
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Privacy Policy</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Manage My Privacy</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/preferences/start/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Do Not Sell or Share My Data</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/" target="_blank"
|
|
rel="noopener" data-cms-ai="0">Terms of Service</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Accessibility</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/" target="_blank"
|
|
rel="noopener" data-cms-ai="0">Corporate Policies</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/product-security/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Product Security</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/contact/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Contact</a>
|
|
</p>
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</div>
|
|
<script>
|
|
jQuery(function () {
|
|
SphinxRtdTheme.Navigation.enable(true);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html> |