From 82845d2c23d7446e7a6938b8c1b7475f656df337 Mon Sep 17 00:00:00 2001 From: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com> Date: Tue, 20 May 2025 09:23:50 +0000 Subject: [PATCH] Update latest GitHub pages to v0.20.0rc3 --- latest/.buildinfo | 4 + latest/.nojekyll | 0 latest/_cpp_gen/executor.html | 6631 +++--- latest/_cpp_gen/runtime.html | 18546 +++++++--------- .../llm_args.py | 1449 ++ latest/_modules/index.html | 46 +- latest/_modules/tensorrt_llm/builder.html | 46 +- .../tensorrt_llm/disaggregated_params.html | 46 +- .../tensorrt_llm/executor/result.html | 46 +- .../_modules/tensorrt_llm/executor/utils.html | 46 +- latest/_modules/tensorrt_llm/functional.html | 108 +- .../tensorrt_llm/layers/activation.html | 46 +- .../tensorrt_llm/layers/attention.html | 86 +- latest/_modules/tensorrt_llm/layers/cast.html | 46 +- latest/_modules/tensorrt_llm/layers/conv.html | 46 +- .../tensorrt_llm/layers/embedding.html | 46 +- .../_modules/tensorrt_llm/layers/linear.html | 46 +- latest/_modules/tensorrt_llm/layers/mlp.html | 46 +- .../tensorrt_llm/layers/normalization.html | 46 +- .../_modules/tensorrt_llm/layers/pooling.html | 46 +- .../tensorrt_llm/llmapi/build_cache.html | 46 +- latest/_modules/tensorrt_llm/llmapi/llm.html | 83 +- .../tensorrt_llm/llmapi/llm_args.html | 108 +- .../tensorrt_llm/llmapi/mpi_session.html | 53 +- .../tensorrt_llm/models/baichuan/model.html | 46 +- .../tensorrt_llm/models/bert/model.html | 46 +- .../tensorrt_llm/models/bloom/model.html | 46 +- .../tensorrt_llm/models/chatglm/config.html | 46 +- .../tensorrt_llm/models/chatglm/model.html | 46 +- .../tensorrt_llm/models/clip/model.html | 46 +- .../tensorrt_llm/models/cogvlm/config.html | 46 +- .../tensorrt_llm/models/cogvlm/model.html | 46 +- .../tensorrt_llm/models/commandr/model.html | 46 +- .../tensorrt_llm/models/dbrx/config.html | 46 +- .../tensorrt_llm/models/dbrx/model.html | 46 +- .../models/deepseek_v1/model.html | 46 +- .../models/deepseek_v2/model.html | 46 +- .../tensorrt_llm/models/dit/model.html | 48 +- .../tensorrt_llm/models/eagle/model.html | 46 +- .../tensorrt_llm/models/enc_dec/model.html | 46 +- .../tensorrt_llm/models/falcon/config.html | 46 +- .../tensorrt_llm/models/falcon/model.html | 46 +- .../tensorrt_llm/models/gemma/config.html | 46 +- .../tensorrt_llm/models/gemma/model.html | 46 +- .../tensorrt_llm/models/gpt/config.html | 46 +- .../tensorrt_llm/models/gpt/model.html | 46 +- .../tensorrt_llm/models/gptj/config.html | 46 +- .../tensorrt_llm/models/gptj/model.html | 46 +- .../tensorrt_llm/models/gptneox/model.html | 46 +- .../tensorrt_llm/models/llama/config.html | 51 +- .../tensorrt_llm/models/llama/model.html | 46 +- .../tensorrt_llm/models/mamba/model.html | 46 +- .../tensorrt_llm/models/medusa/config.html | 46 +- .../tensorrt_llm/models/medusa/model.html | 46 +- .../tensorrt_llm/models/mllama/model.html | 46 +- .../tensorrt_llm/models/mmdit_sd3/model.html | 46 +- .../tensorrt_llm/models/modeling_utils.html | 46 +- .../tensorrt_llm/models/mpt/model.html | 46 +- .../models/multimodal_encoders/config.html | 46 +- .../models/multimodal_encoders/model.html | 46 +- .../tensorrt_llm/models/opt/model.html | 46 +- .../tensorrt_llm/models/phi/model.html | 46 +- .../tensorrt_llm/models/phi3/model.html | 46 +- .../models/recurrentgemma/model.html | 46 +- .../tensorrt_llm/models/redrafter/model.html | 46 +- .../_modules/tensorrt_llm/plugin/plugin.html | 46 +- .../tensorrt_llm/quantization/mode.html | 46 +- .../quantization/quantize_by_modelopt.html | 46 +- .../runtime/enc_dec_model_runner.html | 49 +- .../tensorrt_llm/runtime/generation.html | 48 +- .../runtime/kv_cache_manager.html | 46 +- .../tensorrt_llm/runtime/model_runner.html | 46 +- .../runtime/model_runner_cpp.html | 55 +- .../runtime/multimodal_model_runner.html | 129 +- .../tensorrt_llm/runtime/session.html | 46 +- .../tensorrt_llm/sampling_params.html | 59 +- latest/_sources/_cpp_gen/executor.rst.txt | 42 +- latest/_sources/_cpp_gen/runtime.rst.txt | 318 +- latest/_sources/advanced/gpt-runtime.md.txt | 53 +- .../_sources/advanced/kv-cache-reuse.md.txt | 4 - .../_sources/advanced/weight-streaming.md.txt | 12 - .../architecture/core-concepts.md.txt | 6 +- ...tice_on_DeepSeek-R1_in_TensorRT-LLM.md.txt | 20 +- ...-R1_Performance_on_NVIDIA_B200_GPUs.md.txt | 266 + latest/_sources/commands/trtllm-serve.rst.txt | 1 + .../genai_perf_client_for_multimodal.rst.txt | 10 + latest/_sources/examples/index.rst.txt | 20 +- .../examples/llm_api_examples.rst.txt | 20 +- .../examples/trtllm_serve_examples.rst.txt | 1 + latest/_sources/index.rst.txt | 1 + .../build-from-source-linux.md.txt | 10 +- .../_sources/installation/grace-hopper.md.txt | 4 +- latest/_sources/installation/linux.md.txt | 34 +- .../_sources/performance/perf-overview.md.txt | 503 +- latest/_sources/quick-start-guide.md.txt | 4 +- latest/_sources/reference/memory.md.txt | 12 +- .../_sources/reference/support-matrix.md.txt | 33 +- latest/_sources/release-notes.md.txt | 123 +- latest/_sources/torch.md.txt | 2 +- latest/advanced/disaggregated-service.html | 46 +- latest/advanced/executor.html | 46 +- latest/advanced/expert-parallelism.html | 46 +- latest/advanced/gpt-attention.html | 46 +- latest/advanced/gpt-runtime.html | 99 +- latest/advanced/graph-rewriting.html | 46 +- latest/advanced/kv-cache-reuse.html | 48 +- latest/advanced/lora.html | 46 +- latest/advanced/speculative-decoding.html | 46 +- latest/advanced/weight-streaming.html | 57 +- latest/architecture/add-model.html | 46 +- latest/architecture/checkpoint.html | 46 +- latest/architecture/core-concepts.html | 52 +- latest/architecture/model-weights-loader.html | 46 +- latest/architecture/overview.html | 48 +- latest/architecture/workflow.html | 46 +- ...actice_on_DeepSeek-R1_in_TensorRT-LLM.html | 66 +- latest/blogs/Falcon180B-H200.html | 46 +- latest/blogs/H100vsA100.html | 46 +- latest/blogs/H200launch.html | 46 +- latest/blogs/XQA-kernel.html | 56 +- latest/blogs/quantization-in-TRT-LLM.html | 46 +- ...ek-R1_Performance_on_NVIDIA_B200_GPUs.html | 1182 + latest/commands/trtllm-build.html | 46 +- latest/commands/trtllm-serve.html | 70 +- .../build-image-to-dockerhub.html | 46 +- latest/dev-on-cloud/dev-on-runpod.html | 46 +- latest/examples/curl_chat_client.html | 46 +- .../curl_chat_client_for_multimodal.html | 110 +- latest/examples/curl_completion_client.html | 46 +- latest/examples/customization.html | 46 +- .../deepseek_r1_reasoning_parser.html | 46 +- latest/examples/genai_perf_client.html | 75 +- .../genai_perf_client_for_multimodal.html | 668 + latest/examples/index.html | 72 +- latest/examples/llm_api_examples.html | 66 +- latest/examples/llm_auto_parallel.html | 52 +- latest/examples/llm_eagle_decoding.html | 58 +- latest/examples/llm_guided_decoding.html | 58 +- latest/examples/llm_inference.html | 58 +- latest/examples/llm_inference_async.html | 58 +- .../llm_inference_async_streaming.html | 58 +- latest/examples/llm_inference_customize.html | 58 +- .../examples/llm_inference_distributed.html | 58 +- latest/examples/llm_inference_kv_events.html | 141 +- latest/examples/llm_logits_processor.html | 58 +- latest/examples/llm_lookahead_decoding.html | 58 +- latest/examples/llm_medusa_decoding.html | 58 +- latest/examples/llm_mgmn_llm_distributed.html | 46 +- latest/examples/llm_mgmn_trtllm_bench.html | 81 +- latest/examples/llm_mgmn_trtllm_serve.html | 82 +- latest/examples/llm_multilora.html | 58 +- latest/examples/llm_quantization.html | 58 +- latest/examples/openai_chat_client.html | 52 +- .../openai_chat_client_for_multimodal.html | 196 +- latest/examples/openai_completion_client.html | 46 +- latest/examples/trtllm_serve_examples.html | 47 +- latest/genindex.html | 468 +- latest/index.html | 56 +- .../installation/build-from-source-linux.html | 54 +- latest/installation/grace-hopper.html | 50 +- latest/installation/linux.html | 71 +- latest/key-features.html | 46 +- latest/llm-api/index.html | 46 +- latest/llm-api/reference.html | 60 +- latest/objects.inv | Bin 145032 -> 138114 bytes latest/overview.html | 46 +- latest/performance/perf-analysis.html | 46 +- latest/performance/perf-benchmarking.html | 46 +- latest/performance/perf-overview.html | 2688 +-- .../benchmarking-default-performance.html | 46 +- .../deciding-model-sharding-strategy.html | 46 +- .../fp8-quantization.html | 46 +- .../performance-tuning-guide/index.html | 46 +- ...ing-max-batch-size-and-max-num-tokens.html | 46 +- .../useful-build-time-flags.html | 46 +- .../useful-runtime-flags.html | 46 +- latest/py-modindex.html | 46 +- .../python-api/tensorrt_llm.functional.html | 56 +- latest/python-api/tensorrt_llm.layers.html | 47 +- latest/python-api/tensorrt_llm.models.html | 46 +- latest/python-api/tensorrt_llm.plugin.html | 46 +- .../python-api/tensorrt_llm.quantization.html | 46 +- latest/python-api/tensorrt_llm.runtime.html | 52 +- latest/quick-start-guide.html | 50 +- latest/reference/memory.html | 58 +- latest/reference/precision.html | 46 +- latest/reference/support-matrix.html | 163 +- latest/reference/troubleshooting.html | 46 +- latest/release-notes.html | 568 +- latest/search.html | 46 +- latest/searchindex.js | 2 +- latest/switcher.json | 23 + latest/torch.html | 48 +- latest/torch/adding_new_model.html | 46 +- latest/torch/arch_overview.html | 46 +- latest/torch/attention.html | 46 +- latest/torch/kv_cache_manager.html | 46 +- latest/torch/scheduler.html | 46 +- 198 files changed, 21632 insertions(+), 20826 deletions(-) create mode 100644 latest/.buildinfo create mode 100644 latest/.nojekyll create mode 100644 latest/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py create mode 100644 latest/_sources/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md.txt create mode 100644 latest/_sources/examples/genai_perf_client_for_multimodal.rst.txt create mode 100644 latest/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html create mode 100644 latest/examples/genai_perf_client_for_multimodal.html create mode 100644 latest/switcher.json diff --git a/latest/.buildinfo b/latest/.buildinfo new file mode 100644 index 0000000000..40066c9e5f --- /dev/null +++ b/latest/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 12c1352bd1428d2c6ac709024163b9d8 +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/latest/.nojekyll b/latest/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/latest/_cpp_gen/executor.html b/latest/_cpp_gen/executor.html index 33794be605..20c88f06f3 100644 --- a/latest/_cpp_gen/executor.html +++ b/latest/_cpp_gen/executor.html @@ -51,7 +51,7 @@ @@ -63,7 +63,7 @@ - + @@ -330,19 +330,19 @@

Examples

- -
  • The Session
  • -
  • In-flight Batching Support
  • -
  • Know Issues and Future Changes
  • diff --git a/latest/advanced/graph-rewriting.html b/latest/advanced/graph-rewriting.html index edfe0133a0..6cc3b894ad 100644 --- a/latest/advanced/graph-rewriting.html +++ b/latest/advanced/graph-rewriting.html @@ -51,7 +51,7 @@ @@ -63,7 +63,7 @@ - + @@ -330,19 +330,19 @@

    Examples