mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-22 11:42:41 +08:00
1817 lines
62 KiB
HTML
1817 lines
62 KiB
HTML
<!DOCTYPE html>
|
|
<html class="writer-html5" lang="en" data-content_root="../">
|
|
<head>
|
|
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>Overview — tensorrt_llm documentation</title>
|
|
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
|
|
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
|
|
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
|
|
|
|
|
<!--[if lt IE 9]>
|
|
<script src="../_static/js/html5shiv.min.js"></script>
|
|
<![endif]-->
|
|
|
|
<script src="../_static/jquery.js?v=5d32c60e"></script>
|
|
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
|
|
<script src="../_static/doctools.js?v=888ff710"></script>
|
|
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
|
<script src="../_static/copybutton.js?v=f281be69"></script>
|
|
<script src="../_static/js/theme.js"></script>
|
|
<link rel="index" title="Index" href="../genindex.html" />
|
|
<link rel="search" title="Search" href="../search.html" />
|
|
<link rel="next" title="TensorRT-LLM Benchmarking" href="perf-benchmarking.html" />
|
|
<link rel="prev" title="Speculative Sampling" href="../advanced/speculative-decoding.html" />
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav">
|
|
<div class="wy-grid-for-nav">
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
<div class="wy-side-scroll">
|
|
<div class="wy-side-nav-search" >
|
|
|
|
|
|
|
|
<a href="../index.html" class="icon icon-home">
|
|
tensorrt_llm
|
|
</a>
|
|
<div role="search">
|
|
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
<input type="hidden" name="check_keywords" value="yes" />
|
|
<input type="hidden" name="area" value="default" />
|
|
</form>
|
|
</div>
|
|
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
<p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../key-features.html">Key Features</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../release-notes.html">Release Notes</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Installation</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/linux.html">Installing on Linux</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/windows.html">Installing on Windows</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation/build-from-source-windows.html">Building from Source Code on Windows</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">LLM API</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api/index.html">API Introduction</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api/reference.html">API Reference</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">LLM API Examples</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api-examples/index.html">LLM Examples Introduction</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api-examples/customization.html">Common Customizations</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../llm-api-examples/llm_api_examples.html">Examples</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Model Definition API</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.layers.html">Layers</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.functional.html">Functionals</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.models.html">Models</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.plugin.html">Plugin</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html">Quantization</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.runtime.html">Runtime</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">C++ API</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/executor.html">Executor</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/runtime.html">Runtime</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Command-Line Reference</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-build.html">trtllm-build</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Architecture</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/overview.html">TensorRT-LLM Architecture</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html">Model Definition</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html#compilation">Compilation</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html#runtime">Runtime</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html#multi-gpu-and-multi-node-support">Multi-GPU and Multi-Node Support</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/checkpoint.html">TensorRT-LLM Checkpoint</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/workflow.html">TensorRT-LLM Build Workflow</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../architecture/add-model.html">Adding a Model</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Advanced</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-runtime.html">C++ GPT Runtime</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/executor.html">Executor API</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/graph-rewriting.html">Graph Rewriting Module</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/batch-manager.html">The Batch Manager in TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/inference-request.html">Inference Request</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/inference-request.html#responses">Responses</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/lora.html">Run gpt-2b + LoRA using GptManager / cpp runtime</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/expert-parallelism.html">Expert Parallelism in TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/kv-cache-reuse.html">KV cache reuse</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/speculative-decoding.html">Speculative Sampling</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../advanced/speculative-decoding.html#lookahead-decoding">Lookahead decoding</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Performance</span></p>
|
|
<ul class="current">
|
|
<li class="toctree-l1 current"><a class="current reference internal" href="#">Overview</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="#known-issues">Known Issues</a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#fused-matmul-gated-silu-llama">Fused Matmul + Gated-SiLU (LLaMA)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#throughput-measurements">Throughput Measurements</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#reproducing-benchmarked-results">Reproducing Benchmarked Results</a><ul>
|
|
<li class="toctree-l3"><a class="reference internal" href="#commands">Commands</a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="#variables">Variables</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#preparing-a-dataset">Preparing a Dataset</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#engine-building">Engine Building</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#running-the-benchmark">Running the Benchmark</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="perf-benchmarking.html">Benchmarking</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="perf-best-practices.html">Best Practices</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="perf-analysis.html">Performance Analysis</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/troubleshooting.html">Troubleshooting</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/support-matrix.html">Support Matrix</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/precision.html">Numerical Precision</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../reference/memory.html">Memory Usage of TensorRT-LLM</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/quantization-in-TRT-LLM.html">Speed up inference with SOTA quantization techniques in TRT-LLM</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">tensorrt_llm</a>
|
|
</nav>
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="Page navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
<li class="breadcrumb-item active">Overview</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
<a href="../_sources/performance/perf-overview.md.txt" rel="nofollow"> View page source</a>
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
<div itemprop="articleBody">
|
|
|
|
<blockquote id="perf-overview">
|
|
<div><p>[!IMPORTANT]
|
|
As of TensorRT-LLM v0.10, these performance benchmarks have changed methodology to utilize in-flight batching and
|
|
no longer utilize static benchmarking. These numbers are initial measurements and are expected to improve in future
|
|
releases.</p>
|
|
</div></blockquote>
|
|
<section id="overview">
|
|
<h1>Overview<a class="headerlink" href="#overview" title="Link to this heading"></a></h1>
|
|
<p>This document summarizes performance measurements of TensorRT-LLM on H100
|
|
(Hopper), L40S (Ada) and A100 (Ampere) GPUs for a few key models.</p>
|
|
<p>The data in the following tables is provided as a reference point to help users
|
|
validate observed performance. It should not be considered as the peak
|
|
performance that can be delivered by TensorRT-LLM.</p>
|
|
<section id="known-issues">
|
|
<h2>Known Issues<a class="headerlink" href="#known-issues" title="Link to this heading"></a></h2>
|
|
<p>The following issues are being addressed to improve the efficiency of TensorRT-LLM.</p>
|
|
<section id="fused-matmul-gated-silu-llama">
|
|
<h3>Fused Matmul + Gated-SiLU (LLaMA)<a class="headerlink" href="#fused-matmul-gated-silu-llama" title="Link to this heading"></a></h3>
|
|
<p>The current implementation combines two Matmul operations into one Matmul followed by
|
|
a separate SwiGLU kernel (when <code class="docutils literal notranslate"><span class="pre">--use_fused_mlp=enable</span></code> is enabled). There is also a more
|
|
efficient implementation that runs single Matmul + SwiGLU fused kernel for FP8 on Hopper
|
|
(when <code class="docutils literal notranslate"><span class="pre">--use_fused_mlp=enable</span> <span class="pre">--gemm_swiglu_plugin</span> <span class="pre">fp8</span></code> is enabled). The gemm_swiglu_plugin
|
|
will support more data types and GPU architectures in the future release.</p>
|
|
</section>
|
|
</section>
|
|
<section id="throughput-measurements">
|
|
<h2>Throughput Measurements<a class="headerlink" href="#throughput-measurements" title="Link to this heading"></a></h2>
|
|
<p>The below table shows performance data where a local inference client is fed requests at an infinite rate (no delay between messages),
|
|
and shows the throughput client-server scenario under maximum load.</p>
|
|
<p>The performance numbers below were collected using the steps described in this document.</p>
|
|
<p><strong>All data in the table below was generated using version 0.14.0 and presents token throughput in tokens/second.</strong></p>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
<th class="head"><p></p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p><strong>GPU</strong></p></td>
|
|
<td><p><strong>H200 141GB HBM3</strong></p></td>
|
|
<td><p><strong>H100 80GB HBM3</strong></p></td>
|
|
<td><p><strong>H100 80GB HBM3</strong></p></td>
|
|
<td><p><strong>A100-SXM4-80GB</strong></p></td>
|
|
<td><p><strong>A100-PCIE-80GB</strong></p></td>
|
|
<td><p><strong>L40S</strong></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p><strong>Precision</strong></p></td>
|
|
<td><p><strong>FP8</strong></p></td>
|
|
<td><p><strong>FP8</strong></p></td>
|
|
<td><p><strong>FP16</strong></p></td>
|
|
<td><p><strong>FP16</strong></p></td>
|
|
<td><p><strong>FP16</strong></p></td>
|
|
<td><p><strong>FP8</strong></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p><strong>Model</strong></p></td>
|
|
<td><p><strong>Input/Output Lengths</strong></p></td>
|
|
<td><p><strong>TP Size</strong></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>LLaMA v3 70B</p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2594.2199</p></td>
|
|
<td><p>464.5243</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>4574.1197</p></td>
|
|
<td><p>4092.3267</p></td>
|
|
<td><p>776.9965</p></td>
|
|
<td><p>468.5805</p></td>
|
|
<td><p>259.1155</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>7612.2487</p></td>
|
|
<td><p>6925.0844</p></td>
|
|
<td><p>3730.2064</p></td>
|
|
<td><p>1765.9123</p></td>
|
|
<td><p>987.1971</p></td>
|
|
<td><p>1159.357</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>13075.5194</p></td>
|
|
<td><p>10733.0804</p></td>
|
|
<td><p>5963.0914</p></td>
|
|
<td><p>3054.8915</p></td>
|
|
<td><p>960.3737</p></td>
|
|
<td><p>1173.3517</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3904.1639</p></td>
|
|
<td><p>2551.6384</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>5343.8677</p></td>
|
|
<td><p>5191.7428</p></td>
|
|
<td><p>3183.9714</p></td>
|
|
<td><p>1334.903</p></td>
|
|
<td><p>806.1477</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8829.1049</p></td>
|
|
<td><p>8540.5362</p></td>
|
|
<td><p>5837.9598</p></td>
|
|
<td><p>2421.4383</p></td>
|
|
<td><p>1275.5474</p></td>
|
|
<td><p>1427.9115</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>16359.1322</p></td>
|
|
<td><p>15498.2004</p></td>
|
|
<td><p>10597.6556</p></td>
|
|
<td><p>4474.1621</p></td>
|
|
<td><p>1223.1747</p></td>
|
|
<td><p>1377.473</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3613.7474</p></td>
|
|
<td><p>418.3639</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>7112.2959</p></td>
|
|
<td><p>5852.0185</p></td>
|
|
<td><p>817.52</p></td>
|
|
<td><p>511.6257</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>12772.8148</p></td>
|
|
<td><p>8998.3742</p></td>
|
|
<td><p>5072.0345</p></td>
|
|
<td><p>2484.2018</p></td>
|
|
<td><p>1471.9105</p></td>
|
|
<td><p>1771.4437</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>19722.5974</p></td>
|
|
<td><p>15099.0633</p></td>
|
|
<td><p>7554.2141</p></td>
|
|
<td><p>4463.6602</p></td>
|
|
<td><p>1589.1759</p></td>
|
|
<td><p>1953.7918</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2409.6881</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>5687.3482</p></td>
|
|
<td><p>3513.0941</p></td>
|
|
<td><p>413.3767</p></td>
|
|
<td><p>273.5871</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8937.3115</p></td>
|
|
<td><p>6718.5895</p></td>
|
|
<td><p>3093.7358</p></td>
|
|
<td><p>1688.0132</p></td>
|
|
<td><p>1231.8104</p></td>
|
|
<td><p>1279.2496</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>13976.1386</p></td>
|
|
<td><p>9279.1013</p></td>
|
|
<td><p>5001.2743</p></td>
|
|
<td><p>2948.5374</p></td>
|
|
<td><p>1350.794</p></td>
|
|
<td><p>1494.0776</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>457.5772</p></td>
|
|
<td><p>241.7561</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>699.5582</p></td>
|
|
<td><p>690.9961</p></td>
|
|
<td><p>328.0399</p></td>
|
|
<td><p>145.088</p></td>
|
|
<td><p>91.1746</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1035.6523</p></td>
|
|
<td><p>1008.8318</p></td>
|
|
<td><p>670.6725</p></td>
|
|
<td><p>278.5717</p></td>
|
|
<td><p>150.2619</p></td>
|
|
<td><p>168.7886</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2055.7245</p></td>
|
|
<td><p>1996.2653</p></td>
|
|
<td><p>1288.7599</p></td>
|
|
<td><p>546.9599</p></td>
|
|
<td><p>140.0144</p></td>
|
|
<td><p>160.2741</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1802.1116</p></td>
|
|
<td><p>204.0931</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>3487.2497</p></td>
|
|
<td><p>2444.6903</p></td>
|
|
<td><p>165.6522</p></td>
|
|
<td><p>126.1101</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>6126.7196</p></td>
|
|
<td><p>4850.8285</p></td>
|
|
<td><p>2386.6556</p></td>
|
|
<td><p>1230.1833</p></td>
|
|
<td><p>822.2269</p></td>
|
|
<td><p>876.6085</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>9784.0193</p></td>
|
|
<td><p>7432.6659</p></td>
|
|
<td><p>3991.2123</p></td>
|
|
<td><p>2144.3042</p></td>
|
|
<td><p>883.4809</p></td>
|
|
<td><p>994.94</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2822.7846</p></td>
|
|
<td><p>389.8823</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6175.7623</p></td>
|
|
<td><p>4601.857</p></td>
|
|
<td><p>687.5386</p></td>
|
|
<td><p>430.6093</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>10783.8925</p></td>
|
|
<td><p>9018.9053</p></td>
|
|
<td><p>3698.3674</p></td>
|
|
<td><p>2113.3936</p></td>
|
|
<td><p>1248.8319</p></td>
|
|
<td><p>1468.7827</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>17631.9756</p></td>
|
|
<td><p>11375.9582</p></td>
|
|
<td><p>6321.3679</p></td>
|
|
<td><p>3673.5693</p></td>
|
|
<td><p>1321.8541</p></td>
|
|
<td><p>1636.4588</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>532.2603</p></td>
|
|
<td><p>123.8543</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>931.8255</p></td>
|
|
<td><p>897.4263</p></td>
|
|
<td><p>227.9005</p></td>
|
|
<td><p>117.5698</p></td>
|
|
<td><p>75.35</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1399.7865</p></td>
|
|
<td><p>1316.2865</p></td>
|
|
<td><p>831.2804</p></td>
|
|
<td><p>362.3465</p></td>
|
|
<td><p>209.8052</p></td>
|
|
<td><p>234.7343</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2725.1283</p></td>
|
|
<td><p>2469.5585</p></td>
|
|
<td><p>1446.3508</p></td>
|
|
<td><p>662.5725</p></td>
|
|
<td><p>202.0719</p></td>
|
|
<td><p>231.9027</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>LLaMA v3.1 405B</p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3391.0372</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3766.2785</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>5952.1416</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3944.117</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>481.5732</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>444.5735</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2604.8557</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>4805.86</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>655.9754</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>LLaMA v3.1 70B</p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2585.0953</p></td>
|
|
<td><p>410.286</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>4600.9616</p></td>
|
|
<td><p>4116.4444</p></td>
|
|
<td><p>785.4931</p></td>
|
|
<td><p>468.6383</p></td>
|
|
<td><p>257.972</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>7607.5304</p></td>
|
|
<td><p>6932.8808</p></td>
|
|
<td><p>3774.676</p></td>
|
|
<td><p>1762.6831</p></td>
|
|
<td><p>989.4082</p></td>
|
|
<td><p>1161.4814</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>13081.434</p></td>
|
|
<td><p>10730.156</p></td>
|
|
<td><p>5978.4573</p></td>
|
|
<td><p>3190.0211</p></td>
|
|
<td><p>959.8463</p></td>
|
|
<td><p>1188.1193</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3897.2623</p></td>
|
|
<td><p>2459.6003</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>5357.0227</p></td>
|
|
<td><p>5194.8171</p></td>
|
|
<td><p>3207.2866</p></td>
|
|
<td><p>1346.9692</p></td>
|
|
<td><p>806.7215</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8826.9618</p></td>
|
|
<td><p>8542.3012</p></td>
|
|
<td><p>5846.8413</p></td>
|
|
<td><p>2420.8665</p></td>
|
|
<td><p>1272.6755</p></td>
|
|
<td><p>1438.0446</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>16382.9807</p></td>
|
|
<td><p>15533.1169</p></td>
|
|
<td><p>10649.4968</p></td>
|
|
<td><p>4572.3445</p></td>
|
|
<td><p>1212.0566</p></td>
|
|
<td><p>1381.7051</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3612.2603</p></td>
|
|
<td><p>445.7773</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>7054.7235</p></td>
|
|
<td><p>5869.3998</p></td>
|
|
<td><p>822.1912</p></td>
|
|
<td><p>483.1299</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>12763.4114</p></td>
|
|
<td><p>9017.4377</p></td>
|
|
<td><p>4982.6225</p></td>
|
|
<td><p>2492.4036</p></td>
|
|
<td><p>1435.236</p></td>
|
|
<td><p>1763.522</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>19266.0398</p></td>
|
|
<td><p>15190.1652</p></td>
|
|
<td><p>7605.5295</p></td>
|
|
<td><p>4254.2871</p></td>
|
|
<td><p>1609.2473</p></td>
|
|
<td><p>1944.1251</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2415.1981</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>5671.9561</p></td>
|
|
<td><p>3518.782</p></td>
|
|
<td><p>419.0178</p></td>
|
|
<td><p>272.9137</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>8939.8227</p></td>
|
|
<td><p>6431.2702</p></td>
|
|
<td><p>3083.8794</p></td>
|
|
<td><p>1685.9677</p></td>
|
|
<td><p>1212.5416</p></td>
|
|
<td><p>1280.3778</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>13974.2854</p></td>
|
|
<td><p>9168.709</p></td>
|
|
<td><p>4981.9765</p></td>
|
|
<td><p>3067.5452</p></td>
|
|
<td><p>1310.091</p></td>
|
|
<td><p>1499.2441</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>240.7202</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>614.318</p></td>
|
|
<td><p>397.6801</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1030.9528</p></td>
|
|
<td><p>851.8542</p></td>
|
|
<td><p>369.4269</p></td>
|
|
<td><p>179.5181</p></td>
|
|
<td><p>126.7676</p></td>
|
|
<td><p>140.5565</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>1898.9762</p></td>
|
|
<td><p>1354.5333</p></td>
|
|
<td><p></p></td>
|
|
<td><p>362.9368</p></td>
|
|
<td><p>156.5767</p></td>
|
|
<td><p>141.1584</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>458.1948</p></td>
|
|
<td><p>244.1842</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>692.3911</p></td>
|
|
<td><p>697.3907</p></td>
|
|
<td><p>322.7016</p></td>
|
|
<td><p>144.7921</p></td>
|
|
<td><p>95.0306</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1034.5773</p></td>
|
|
<td><p>1001.0771</p></td>
|
|
<td><p>688.0344</p></td>
|
|
<td><p>278.4018</p></td>
|
|
<td><p>150.6795</p></td>
|
|
<td><p>169.0386</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2070.8157</p></td>
|
|
<td><p>1966.6072</p></td>
|
|
<td><p>1316.3086</p></td>
|
|
<td><p>550.4751</p></td>
|
|
<td><p>142.6166</p></td>
|
|
<td><p>163.6749</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1797.6743</p></td>
|
|
<td><p>209.1707</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>3518.0774</p></td>
|
|
<td><p>2445.0093</p></td>
|
|
<td><p>166.792</p></td>
|
|
<td><p>126.1127</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>6112.9026</p></td>
|
|
<td><p>4838.5272</p></td>
|
|
<td><p>2393.1359</p></td>
|
|
<td><p>1231.0359</p></td>
|
|
<td><p>823.4777</p></td>
|
|
<td><p>876.2254</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>9716.1934</p></td>
|
|
<td><p>7434.8117</p></td>
|
|
<td><p>4023.6978</p></td>
|
|
<td><p>2171.5323</p></td>
|
|
<td><p>858.6602</p></td>
|
|
<td><p>1001.3649</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>2826.6665</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>6106.5855</p></td>
|
|
<td><p>4605.9226</p></td>
|
|
<td><p>700.5415</p></td>
|
|
<td><p>430.6129</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>10816.8283</p></td>
|
|
<td><p>9205.3766</p></td>
|
|
<td><p>3781.082</p></td>
|
|
<td><p>2096.2441</p></td>
|
|
<td><p>1176.418</p></td>
|
|
<td><p>1470.0826</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>17693.705</p></td>
|
|
<td><p>13109.4437</p></td>
|
|
<td><p>6205.2658</p></td>
|
|
<td><p>3486.7891</p></td>
|
|
<td><p>1306.35</p></td>
|
|
<td><p>1639.2778</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>533.6128</p></td>
|
|
<td><p>125.4236</p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>936.7014</p></td>
|
|
<td><p>886.6758</p></td>
|
|
<td><p>228.874</p></td>
|
|
<td><p>116.9529</p></td>
|
|
<td><p>76.1601</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>1386.4827</p></td>
|
|
<td><p>1313.893</p></td>
|
|
<td><p>849.1091</p></td>
|
|
<td><p>362.9361</p></td>
|
|
<td><p>209.2045</p></td>
|
|
<td><p>236.117</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2711.5057</p></td>
|
|
<td><p>2444.9643</p></td>
|
|
<td><p>1420.5163</p></td>
|
|
<td><p>670.3742</p></td>
|
|
<td><p>203.8008</p></td>
|
|
<td><p>230.3084</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>LLaMA v3.1 8B</p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>16414.6988</p></td>
|
|
<td><p>14108.0361</p></td>
|
|
<td><p>7054.5156</p></td>
|
|
<td><p>3634.3886</p></td>
|
|
<td><p>3165.3542</p></td>
|
|
<td><p>3726.7552</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>27778.8885</p></td>
|
|
<td><p>26933.1886</p></td>
|
|
<td><p>15571.6549</p></td>
|
|
<td><p>6701.7958</p></td>
|
|
<td><p>5338.0166</p></td>
|
|
<td><p>8639.7933</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>22948.5383</p></td>
|
|
<td><p>18995.2523</p></td>
|
|
<td><p>9150.7477</p></td>
|
|
<td><p>4963.4443</p></td>
|
|
<td><p>4250.6391</p></td>
|
|
<td><p>5101.6652</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>15583.3035</p></td>
|
|
<td><p>11815.449</p></td>
|
|
<td><p>5368.9227</p></td>
|
|
<td><p>3011.3335</p></td>
|
|
<td><p>2568.5398</p></td>
|
|
<td><p>2774.5363</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1649.5453</p></td>
|
|
<td><p>1301.4754</p></td>
|
|
<td><p>562.8735</p></td>
|
|
<td><p>316.533</p></td>
|
|
<td><p>291.4776</p></td>
|
|
<td><p>270.5404</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3619.4309</p></td>
|
|
<td><p>3460.3545</p></td>
|
|
<td><p>1904.3259</p></td>
|
|
<td><p>795.389</p></td>
|
|
<td><p>611.8446</p></td>
|
|
<td><p>986.9134</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>11032.9729</p></td>
|
|
<td><p>8777.6623</p></td>
|
|
<td><p>4159.6857</p></td>
|
|
<td><p>2264.9513</p></td>
|
|
<td><p>2011.1215</p></td>
|
|
<td><p>2018.303</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>19510.4015</p></td>
|
|
<td><p>14993.328</p></td>
|
|
<td><p>7498.3331</p></td>
|
|
<td><p>3945.1912</p></td>
|
|
<td><p>3374.7133</p></td>
|
|
<td><p>4065.3921</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3787.6721</p></td>
|
|
<td><p>3258.2001</p></td>
|
|
<td><p>1708.0353</p></td>
|
|
<td><p>790.6631</p></td>
|
|
<td><p>703.56</p></td>
|
|
<td><p>855.9822</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>Mistral 7B</p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>17739.1436</p></td>
|
|
<td><p>14986.7562</p></td>
|
|
<td><p>7697.1418</p></td>
|
|
<td><p>3804.5585</p></td>
|
|
<td><p>3333.4754</p></td>
|
|
<td><p>3981.4799</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>30094.9137</p></td>
|
|
<td><p>29341.284</p></td>
|
|
<td><p>16238.937</p></td>
|
|
<td><p>6914.2184</p></td>
|
|
<td><p>5491.7418</p></td>
|
|
<td><p>9127.5052</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>24671.5477</p></td>
|
|
<td><p>20941.6631</p></td>
|
|
<td><p>9708.1161</p></td>
|
|
<td><p>5303.4318</p></td>
|
|
<td><p>4402.3044</p></td>
|
|
<td><p>5357.3405</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>16454.0833</p></td>
|
|
<td><p>12780.3724</p></td>
|
|
<td><p>5800.4957</p></td>
|
|
<td><p>3235.0678</p></td>
|
|
<td><p>2825.7896</p></td>
|
|
<td><p>2879.9833</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>1676.0415</p></td>
|
|
<td><p>1317.9654</p></td>
|
|
<td><p>569.7589</p></td>
|
|
<td><p>324.5936</p></td>
|
|
<td><p>281.4751</p></td>
|
|
<td><p>286.353</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3649.1462</p></td>
|
|
<td><p>3492.3042</p></td>
|
|
<td><p>1929.3126</p></td>
|
|
<td><p>800.9286</p></td>
|
|
<td><p>617.0932</p></td>
|
|
<td><p>1019.75</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>11403.6968</p></td>
|
|
<td><p>8974.7383</p></td>
|
|
<td><p>4367.8733</p></td>
|
|
<td><p>2331.8112</p></td>
|
|
<td><p>1988.3496</p></td>
|
|
<td><p>2184.3861</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>20819.4592</p></td>
|
|
<td><p>15992.3357</p></td>
|
|
<td><p>7947.4257</p></td>
|
|
<td><p>4189.395</p></td>
|
|
<td><p>3603.4489</p></td>
|
|
<td><p>4286.3867</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>1</p></td>
|
|
<td><p>3840.0108</p></td>
|
|
<td><p>3340.7385</p></td>
|
|
<td><p>1707.2611</p></td>
|
|
<td><p>807.4561</p></td>
|
|
<td><p>722.8385</p></td>
|
|
<td><p>881.7336</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>Mixtral 8x22B</p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>18557.43</p></td>
|
|
<td><p>16918.03</p></td>
|
|
<td><p>9759.888</p></td>
|
|
<td><p>4753.6273</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2128.4403</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>25179.4765</p></td>
|
|
<td><p>23729.5293</p></td>
|
|
<td><p>16421.3182</p></td>
|
|
<td><p>6948.5923</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2488.6297</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>27492.4926</p></td>
|
|
<td><p>24556.7807</p></td>
|
|
<td><p>12303.4168</p></td>
|
|
<td><p>7246.7172</p></td>
|
|
<td><p></p></td>
|
|
<td><p>3540.0067</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>19718.8648</p></td>
|
|
<td><p>17755.0018</p></td>
|
|
<td><p>7474.3817</p></td>
|
|
<td><p>4696.6123</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2568.3114</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>2897.182</p></td>
|
|
<td><p>2189.606</p></td>
|
|
<td><p>1118.8294</p></td>
|
|
<td><p>594.8509</p></td>
|
|
<td><p></p></td>
|
|
<td><p>309.0799</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3093.8418</p></td>
|
|
<td><p>2917.1362</p></td>
|
|
<td><p>1994.0127</p></td>
|
|
<td><p>825.3934</p></td>
|
|
<td><p></p></td>
|
|
<td><p>294.7706</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>13795.9827</p></td>
|
|
<td><p>12487.6502</p></td>
|
|
<td><p>5857.8831</p></td>
|
|
<td><p>3377.8371</p></td>
|
|
<td><p></p></td>
|
|
<td><p>1694.6176</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>24637.473</p></td>
|
|
<td><p>19997.3914</p></td>
|
|
<td><p>10637.6598</p></td>
|
|
<td><p>6007.619</p></td>
|
|
<td><p></p></td>
|
|
<td><p>2976.9633</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>3889.2745</p></td>
|
|
<td><p>3578.4843</p></td>
|
|
<td><p>2211.2377</p></td>
|
|
<td><p>1028.3843</p></td>
|
|
<td><p></p></td>
|
|
<td><p>420.2156</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>Mixtral 8x7B</p></td>
|
|
<td><p>1000/1000</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>18712.2046</p></td>
|
|
<td><p>15931.8663</p></td>
|
|
<td><p>6052.876</p></td>
|
|
<td><p>3276.6186</p></td>
|
|
<td><p>1907.8817</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>32834.0923</p></td>
|
|
<td><p>28015.1981</p></td>
|
|
<td><p>15509.1538</p></td>
|
|
<td><p>7357.1613</p></td>
|
|
<td><p>4737.0179</p></td>
|
|
<td><p>5060.8399</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>44410.7533</p></td>
|
|
<td><p>40573.0499</p></td>
|
|
<td><p>27684.9381</p></td>
|
|
<td><p>13948.1533</p></td>
|
|
<td><p>4970.9287</p></td>
|
|
<td><p>5725.9638</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/128</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>24970.5594</p></td>
|
|
<td><p>24321.9927</p></td>
|
|
<td><p>15334.2103</p></td>
|
|
<td><p>5915.3897</p></td>
|
|
<td><p>3810.1846</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>42500.5855</p></td>
|
|
<td><p>40182.7271</p></td>
|
|
<td><p>27718.9857</p></td>
|
|
<td><p>11328.7486</p></td>
|
|
<td><p>6026.9206</p></td>
|
|
<td><p>6769.9441</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>54304.0436</p></td>
|
|
<td><p>51030.9048</p></td>
|
|
<td><p>40119.3268</p></td>
|
|
<td><p>17918.1146</p></td>
|
|
<td><p>5573.7682</p></td>
|
|
<td><p>6422.4308</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>128/2048</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>29314.1475</p></td>
|
|
<td><p>20945.7816</p></td>
|
|
<td><p>7409.9253</p></td>
|
|
<td><p>4284.3035</p></td>
|
|
<td><p>2248.1815</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>52680.8353</p></td>
|
|
<td><p>40668.5928</p></td>
|
|
<td><p>21293.1761</p></td>
|
|
<td><p>10929.0182</p></td>
|
|
<td><p>7353.7405</p></td>
|
|
<td><p>7506.7612</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>70409.1968</p></td>
|
|
<td><p>64529.9982</p></td>
|
|
<td><p>40839.3077</p></td>
|
|
<td><p>21058.2144</p></td>
|
|
<td><p>8866.251</p></td>
|
|
<td><p>9907.6896</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>128/4096</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>21520.4385</p></td>
|
|
<td><p>12070.6724</p></td>
|
|
<td><p>3928.6678</p></td>
|
|
<td><p>2302.964</p></td>
|
|
<td><p>1171.966</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>32550.5267</p></td>
|
|
<td><p>29120.2002</p></td>
|
|
<td><p>11678.0071</p></td>
|
|
<td><p>6538.1511</p></td>
|
|
<td><p>5176.9632</p></td>
|
|
<td><p>4958.7004</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>40373.4857</p></td>
|
|
<td><p>36357.7861</p></td>
|
|
<td><p>21628.821</p></td>
|
|
<td><p>13565.7778</p></td>
|
|
<td><p>7209.2336</p></td>
|
|
<td><p>8271.7938</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>20000/2000</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>2204.1378</p></td>
|
|
<td><p>1659.5907</p></td>
|
|
<td><p>622.2717</p></td>
|
|
<td><p>321.9839</p></td>
|
|
<td><p>185.6671</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>4047.7473</p></td>
|
|
<td><p>3290.9457</p></td>
|
|
<td><p>1602.0208</p></td>
|
|
<td><p>778.7285</p></td>
|
|
<td><p>572.4282</p></td>
|
|
<td><p>587.1759</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>6561.6849</p></td>
|
|
<td><p>5328.5261</p></td>
|
|
<td><p>3113.2047</p></td>
|
|
<td><p>1645.8114</p></td>
|
|
<td><p>750.5372</p></td>
|
|
<td><p>828.8471</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>2048/128</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>2958.0873</p></td>
|
|
<td><p>2883.5166</p></td>
|
|
<td><p>1796.5451</p></td>
|
|
<td><p>687.7251</p></td>
|
|
<td><p>465.1585</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>5229.8744</p></td>
|
|
<td><p>4972.6818</p></td>
|
|
<td><p>3354.994</p></td>
|
|
<td><p>1351.7191</p></td>
|
|
<td><p>728.4943</p></td>
|
|
<td><p>812.0143</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>7030.9766</p></td>
|
|
<td><p>6532.721</p></td>
|
|
<td><p>5025.3047</p></td>
|
|
<td><p>2248.6418</p></td>
|
|
<td><p>677.9886</p></td>
|
|
<td><p>771.3656</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>2048/2048</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>13842.834</p></td>
|
|
<td><p>9334.0732</p></td>
|
|
<td><p>3503.0218</p></td>
|
|
<td><p>1997.1923</p></td>
|
|
<td><p>1060.8946</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>22389.4914</p></td>
|
|
<td><p>20185.8212</p></td>
|
|
<td><p>9143.2741</p></td>
|
|
<td><p>4963.8758</p></td>
|
|
<td><p>3520.3659</p></td>
|
|
<td><p>3453.8076</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>28975.322</p></td>
|
|
<td><p>26176.9163</p></td>
|
|
<td><p>19291.8278</p></td>
|
|
<td><p>10552.9732</p></td>
|
|
<td><p>4590.187</p></td>
|
|
<td><p>4929.7228</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p>500/2000</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>23459.0411</p></td>
|
|
<td><p>18185.6392</p></td>
|
|
<td><p>6023.3308</p></td>
|
|
<td><p>3438.6964</p></td>
|
|
<td><p>1817.11</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>39971.0236</p></td>
|
|
<td><p>31693.8787</p></td>
|
|
<td><p>17087.037</p></td>
|
|
<td><p>8930.3495</p></td>
|
|
<td><p>6117.5624</p></td>
|
|
<td><p>6434.9178</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>60721.462</p></td>
|
|
<td><p>48842.8084</p></td>
|
|
<td><p>31358.2791</p></td>
|
|
<td><p>17034.706</p></td>
|
|
<td><p>7118.0767</p></td>
|
|
<td><p>8130.8026</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p>5000/500</p></td>
|
|
<td><p>2</p></td>
|
|
<td><p>3742.5293</p></td>
|
|
<td><p>3563.8228</p></td>
|
|
<td><p>1648.9041</p></td>
|
|
<td><p>733.1921</p></td>
|
|
<td><p>448.6716</p></td>
|
|
<td><p></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>4</p></td>
|
|
<td><p>6602.3877</p></td>
|
|
<td><p>6020.6267</p></td>
|
|
<td><p>3543.6819</p></td>
|
|
<td><p>1603.8223</p></td>
|
|
<td><p>948.0567</p></td>
|
|
<td><p>1047.3212</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p></p></td>
|
|
<td><p></p></td>
|
|
<td><p>8</p></td>
|
|
<td><p>8862.8164</p></td>
|
|
<td><p>8214.9445</p></td>
|
|
<td><p>5968.7734</p></td>
|
|
<td><p>2813.1531</p></td>
|
|
<td><p>969.817</p></td>
|
|
<td><p>1098.3081</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<p><em>TP stands for Tensor Parallelism</em></p>
|
|
</section>
|
|
<section id="reproducing-benchmarked-results">
|
|
<h2>Reproducing Benchmarked Results<a class="headerlink" href="#reproducing-benchmarked-results" title="Link to this heading"></a></h2>
|
|
<blockquote>
|
|
<div><p>[!NOTE] The only models supported in this workflow are those listed in the table above.</p>
|
|
</div></blockquote>
|
|
<p>The following tables are references for commands that are used as part of the benchmarking process. For a more detailed
|
|
description of this benchmarking workflow, see the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/performance/perf-benchmarking.html">benchmarking suite documentation</a>.</p>
|
|
<section id="commands">
|
|
<h3>Commands<a class="headerlink" href="#commands" title="Link to this heading"></a></h3>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head text-left"><p>Stage</p></th>
|
|
<th class="head"><p>Description</p></th>
|
|
<th class="head"><p>Command</p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td class="text-left"><p><a class="reference internal" href="#preparing-a-dataset">Dataset</a></p></td>
|
|
<td><p>Create a synthetic dataset</p></td>
|
|
<td><p><code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">benchmarks/cpp/prepare_dataset.py</span> <span class="pre">--tokenizer=$model_name</span> <span class="pre">--stdout</span> <span class="pre">token-norm-dist</span> <span class="pre">--num-requests=$num_requests</span> <span class="pre">--input-mean=$isl</span> <span class="pre">--output-mean=$osl</span> <span class="pre">--input-stdev=0</span> <span class="pre">--output-stdev=0</span> <span class="pre">></span> <span class="pre">$dataset_file</span></code></p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><a class="reference internal" href="#engine-building">Build</a></p></td>
|
|
<td><p>Build a TensorRT-LLM engine</p></td>
|
|
<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">--model</span> <span class="pre">$model_name</span> <span class="pre">build</span> <span class="pre">--tp_size</span> <span class="pre">$tp_size</span> <span class="pre">--quantization</span> <span class="pre">FP8</span> <span class="pre">--dataset</span> <span class="pre">$dataset_file</span></code></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><a class="reference internal" href="#running-the-benchmark">Run</a></p></td>
|
|
<td><p>Run a benchmark with a dataset</p></td>
|
|
<td><p><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">--model</span> <span class="pre">$model_name</span> <span class="pre">throughput</span> <span class="pre">--dataset</span> <span class="pre">$dataset_file</span> <span class="pre">--engine_dir</span> <span class="pre">$engine_dir</span></code></p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
<section id="variables">
|
|
<h3>Variables<a class="headerlink" href="#variables" title="Link to this heading"></a></h3>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head text-left"><p>Name</p></th>
|
|
<th class="head"><p>Description</p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$isl</span></code></p></td>
|
|
<td><p>Benchmark input sequence length.</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$osl</span></code></p></td>
|
|
<td><p>Benchmark output sequence length.</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$tp_size</span></code></p></td>
|
|
<td><p>Number of GPUs to run the benchmark with</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$engine_dir</span></code></p></td>
|
|
<td><p>Location to store built engine file (can be deleted after running benchmarks).</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$model_name</span></code></p></td>
|
|
<td><p>HuggingFace model name eg. meta-llama/Llama-2-7b-hf or use the path to a local weights directory</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$dataset_file</span></code></p></td>
|
|
<td><p>Location of the dataset file generated by <code class="docutils literal notranslate"><span class="pre">prepare_dataset.py</span></code></p></td>
|
|
</tr>
|
|
<tr class="row-even"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$num_requests</span></code></p></td>
|
|
<td><p>The number of requests to generate for dataset generation</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td class="text-left"><p><code class="docutils literal notranslate"><span class="pre">$seq_len</span></code></p></td>
|
|
<td><p>A sequence length of ISL + OSL</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
</section>
|
|
<section id="preparing-a-dataset">
|
|
<h2>Preparing a Dataset<a class="headerlink" href="#preparing-a-dataset" title="Link to this heading"></a></h2>
|
|
<p>In order to prepare a dataset, you can use the provided <a class="reference download internal" download="" href="../_downloads/ea8faa5e98124e92f96b66dc586fb429/prepare_dataset.py"><span class="xref download myst">script</span></a>.
|
|
To generate a synthetic dataset, run the following command:</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span>--output<span class="o">=</span><span class="nv">$dataset_file</span><span class="w"> </span>--tokenizer<span class="o">=</span><span class="nv">$model_name</span><span class="w"> </span>token-norm-dist<span class="w"> </span>--num-requests<span class="o">=</span><span class="nv">$num_requests</span><span class="w"> </span>--input-mean<span class="o">=</span><span class="nv">$isl</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="nv">$osl</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>><span class="w"> </span><span class="nv">$dataset_file</span>
|
|
</pre></div>
|
|
</div>
|
|
<p>The command will generate a text file located at the path specified <code class="docutils literal notranslate"><span class="pre">$dataset_file</span></code> where all requests are of the same
|
|
input/output sequence length combinations. The script works by using the tokenizer to retrieve the vocabulary size and
|
|
randomly sample token IDs from it to create entirely random sequences. In the command above, all requests will be uniform
|
|
because the standard deviations for both input and output sequences are set to 0.</p>
|
|
<p>For each input and output sequence length combination, the table below details the <code class="docutils literal notranslate"><span class="pre">$num_requests</span></code> that were used. For
|
|
shorter input and output lengths, a larger number of messages were used to guarantee that the system hit a steady state
|
|
because requests enter and exit the system at a much faster rate. For longer input/output sequence lengths, requests
|
|
remain in the system longer and therefore require less requests to achieve steady state.</p>
|
|
<table class="docutils align-default">
|
|
<thead>
|
|
<tr class="row-odd"><th class="head"><p>Input Length</p></th>
|
|
<th class="head"><p>Output Length</p></th>
|
|
<th class="head"><p>$seq_len</p></th>
|
|
<th class="head"><p>$num_requests</p></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr class="row-even"><td><p>128</p></td>
|
|
<td><p>128</p></td>
|
|
<td><p>256</p></td>
|
|
<td><p>30000</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>128</p></td>
|
|
<td><p>2048</p></td>
|
|
<td><p>2176</p></td>
|
|
<td><p>3000</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>128</p></td>
|
|
<td><p>4096</p></td>
|
|
<td><p>4224</p></td>
|
|
<td><p>1500</p></td>
|
|
</tr>
|
|
<tr class="row-odd"><td><p>2048</p></td>
|
|
<td><p>128</p></td>
|
|
<td><p>2176</p></td>
|
|
<td><p>3000</p></td>
|
|
</tr>
|
|
<tr class="row-even"><td><p>2048</p></td>
|
|
<td><p>2048</p></td>
|
|
<td><p>4096</p></td>
|
|
<td><p>1500</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
<section id="engine-building">
|
|
<h2>Engine Building<a class="headerlink" href="#engine-building" title="Link to this heading"></a></h2>
|
|
<p>All engines are built using the <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">build</span></code> sub-command. The basic command for FP8 quantized engines is as follows:</p>
|
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>trtllm-bench --model $model_name build --tp_size $tp_size --quantization FP8 --dataset $dataset_file
|
|
</pre></div>
|
|
</div>
|
|
<p>or if you would like to build for a specific sequence length:</p>
|
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>trtllm-bench --model $model_name build --tp_size $tp_size --quantization FP8 --max_seq_length $seq_len
|
|
</pre></div>
|
|
</div>
|
|
<p>If you would like to build an FP16 engine without any quantization, simply remove the <code class="docutils literal notranslate"><span class="pre">--quantization</span> <span class="pre">FP8</span></code> option.</p>
|
|
<blockquote>
|
|
<div><p>[!NOTE] If you specify FP8 quantization, the KV cache will automatically be set to FP8 as well!</p>
|
|
</div></blockquote>
|
|
<p>The <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">build</span></code> sub-command will output the path where the engine is located upon a successful build. For example,</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="o">===========================================================</span>
|
|
ENGINE<span class="w"> </span>SAVED:<span class="w"> </span>/tmp/meta-llama/Llama-2-7b-hf/tp_1_pp_1
|
|
<span class="o">===========================================================</span>
|
|
</pre></div>
|
|
</div>
|
|
</section>
|
|
<section id="running-the-benchmark">
|
|
<h2>Running the Benchmark<a class="headerlink" href="#running-the-benchmark" title="Link to this heading"></a></h2>
|
|
<p>To run the benchmark with the generated data set, simply use the <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">throughput</span></code> sub-command. The benchmarker will
|
|
run an offline maximum throughput scenario such that all requests are queued in rapid succession. You simply need to provide
|
|
the patch to the engine from the <a class="reference internal" href="#engine-building">build</a> phase and a <a class="reference internal" href="#preparing-a-dataset">generated dataset</a>.</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span>--model<span class="w"> </span><span class="nv">$model_name</span><span class="w"> </span>throughput<span class="w"> </span>--dataset<span class="w"> </span><span class="nv">$dataset_file</span><span class="w"> </span>--engine_dir<span class="w"> </span><span class="nv">$engine_dir</span>
|
|
</pre></div>
|
|
</div>
|
|
<p>The results will be printed to the terminal upon benchmark completion. For example,</p>
|
|
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="o">===========================================================</span>
|
|
<span class="o">=</span><span class="w"> </span>ENGINE<span class="w"> </span><span class="nv">DETAILS</span>
|
|
<span class="o">===========================================================</span>
|
|
Model:<span class="w"> </span>meta-llama/Llama-2-7b-hf
|
|
Engine<span class="w"> </span>Directory:<span class="w"> </span>/tmp/meta-llama/Llama-2-7b-hf/tp_1_pp_1
|
|
TensorRT-LLM<span class="w"> </span>Version:<span class="w"> </span><span class="m">0</span>.12.0
|
|
Dtype:<span class="w"> </span>float16
|
|
KV<span class="w"> </span>Cache<span class="w"> </span>Dtype:<span class="w"> </span>FP8
|
|
Quantization:<span class="w"> </span>FP8
|
|
Max<span class="w"> </span>Input<span class="w"> </span>Length:<span class="w"> </span><span class="m">2048</span>
|
|
Max<span class="w"> </span>Sequence<span class="w"> </span>Length:<span class="w"> </span><span class="nv">4098</span>
|
|
|
|
<span class="o">===========================================================</span>
|
|
<span class="o">=</span><span class="w"> </span>WORLD<span class="w"> </span>+<span class="w"> </span>RUNTIME<span class="w"> </span><span class="nv">INFORMATION</span>
|
|
<span class="o">===========================================================</span>
|
|
TP<span class="w"> </span>Size:<span class="w"> </span><span class="m">1</span>
|
|
PP<span class="w"> </span>Size:<span class="w"> </span><span class="m">1</span>
|
|
Max<span class="w"> </span>Runtime<span class="w"> </span>Batch<span class="w"> </span>Size:<span class="w"> </span><span class="m">4096</span>
|
|
Max<span class="w"> </span>Runtime<span class="w"> </span>Tokens:<span class="w"> </span><span class="m">8192</span>
|
|
Scheduling<span class="w"> </span>Policy:<span class="w"> </span>Guaranteed<span class="w"> </span>No<span class="w"> </span>Evict
|
|
KV<span class="w"> </span>Memory<span class="w"> </span>Percentage:<span class="w"> </span><span class="m">99</span>.0%
|
|
Issue<span class="w"> </span>Rate<span class="w"> </span><span class="o">(</span>req/sec<span class="o">)</span>:<span class="w"> </span><span class="m">3</span>.680275266452667e+18
|
|
<span class="o">===========================================================</span>
|
|
<span class="o">=</span><span class="w"> </span><span class="nv">STATISTICS</span>
|
|
<span class="o">===========================================================</span>
|
|
Number<span class="w"> </span>of<span class="w"> </span>requests:<span class="w"> </span><span class="m">3000</span>
|
|
Average<span class="w"> </span>Input<span class="w"> </span>Length<span class="w"> </span><span class="o">(</span>tokens<span class="o">)</span>:<span class="w"> </span><span class="m">128</span>.0
|
|
Average<span class="w"> </span>Output<span class="w"> </span>Length<span class="w"> </span><span class="o">(</span>tokens<span class="o">)</span>:<span class="w"> </span><span class="m">128</span>.0
|
|
Token<span class="w"> </span>Throughput<span class="w"> </span><span class="o">(</span>tokens/sec<span class="o">)</span>:<span class="w"> </span><span class="m">23405</span>.927228471104
|
|
Request<span class="w"> </span>Throughput<span class="w"> </span><span class="o">(</span>req/sec<span class="o">)</span>:<span class="w"> </span><span class="m">182</span>.8588064724305
|
|
Total<span class="w"> </span>Latency<span class="w"> </span><span class="o">(</span>seconds<span class="o">)</span>:<span class="w"> </span><span class="m">16</span>.406100739
|
|
<span class="o">===========================================================</span>
|
|
</pre></div>
|
|
</div>
|
|
<blockquote>
|
|
<div><p>[!WARNING] In some cases, the benchmarker may not print anything at all. This behavior usually
|
|
means that the benchmark has hit an out of memory issue. Try reducing the KV cache percentage
|
|
using the <code class="docutils literal notranslate"><span class="pre">--kv_cache_free_gpu_mem_fraction</span></code> option to lower the percentage of used memory.</p>
|
|
</div></blockquote>
|
|
</section>
|
|
</section>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
|
<a href="../advanced/speculative-decoding.html" class="btn btn-neutral float-left" title="Speculative Sampling" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
|
<a href="perf-benchmarking.html" class="btn btn-neutral float-right" title="TensorRT-LLM Benchmarking" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
|
</div>
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<jinja2.runtime.BlockReference object at 0x7fc13e582e60>
|
|
|
|
<div class="footer">
|
|
<p>
|
|
Copyright © 2024 NVIDIA Corporation
|
|
</p>
|
|
<p>
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Privacy Policy</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Manage My Privacy</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/preferences/start/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Do Not Sell or Share My Data</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/" target="_blank"
|
|
rel="noopener" data-cms-ai="0">Terms of Service</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Accessibility</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/" target="_blank"
|
|
rel="noopener" data-cms-ai="0">Corporate Policies</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/product-security/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Product Security</a> |
|
|
<a class="Link" href="https://www.nvidia.com/en-us/contact/" target="_blank" rel="noopener"
|
|
data-cms-ai="0">Contact</a>
|
|
</p>
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</div>
|
|
<script>
|
|
jQuery(function () {
|
|
SphinxRtdTheme.Navigation.enable(true);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html> |