From 2caf860fe7b07bf993d344c8073a659eb1d7bff4 Mon Sep 17 00:00:00 2001
From: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:35:09 +0000
Subject: [PATCH] Update GitHub pages in root to v0.21.0rc0
---
.buildinfo | 2 +-
_cpp_gen/executor.html | 951 +++++++++++++-
_cpp_gen/runtime.html | 69 +-
.../attention.py | 1081 +++++++++++++++
.../llm_args.py | 631 +++++++--
_images/8x_l20_L40S_node_architecture.png | Bin 0 -> 267638 bytes
_images/tech_blog3_mla_absorb.png | Bin 0 -> 560643 bytes
_modules/index.html | 17 +-
_modules/tensorrt_llm/builder.html | 17 +-
.../tensorrt_llm/disaggregated_params.html | 17 +-
_modules/tensorrt_llm/executor/result.html | 21 +-
_modules/tensorrt_llm/executor/utils.html | 49 +-
_modules/tensorrt_llm/functional.html | 20 +-
_modules/tensorrt_llm/layers/activation.html | 17 +-
_modules/tensorrt_llm/layers/attention.html | 17 +-
_modules/tensorrt_llm/layers/cast.html | 17 +-
_modules/tensorrt_llm/layers/conv.html | 17 +-
_modules/tensorrt_llm/layers/embedding.html | 17 +-
_modules/tensorrt_llm/layers/linear.html | 17 +-
_modules/tensorrt_llm/layers/mlp.html | 17 +-
.../tensorrt_llm/layers/normalization.html | 17 +-
_modules/tensorrt_llm/layers/pooling.html | 17 +-
_modules/tensorrt_llm/llmapi/build_cache.html | 17 +-
_modules/tensorrt_llm/llmapi/llm.html | 112 +-
_modules/tensorrt_llm/llmapi/llm_args.html | 678 ++++++++--
_modules/tensorrt_llm/llmapi/mpi_session.html | 20 +-
.../tensorrt_llm/models/baichuan/model.html | 17 +-
_modules/tensorrt_llm/models/bert/model.html | 17 +-
_modules/tensorrt_llm/models/bloom/model.html | 17 +-
.../tensorrt_llm/models/chatglm/config.html | 17 +-
.../tensorrt_llm/models/chatglm/model.html | 17 +-
_modules/tensorrt_llm/models/clip/model.html | 17 +-
.../tensorrt_llm/models/cogvlm/config.html | 17 +-
.../tensorrt_llm/models/cogvlm/model.html | 17 +-
.../tensorrt_llm/models/commandr/model.html | 17 +-
_modules/tensorrt_llm/models/dbrx/config.html | 17 +-
_modules/tensorrt_llm/models/dbrx/model.html | 17 +-
.../models/deepseek_v1/model.html | 17 +-
.../models/deepseek_v2/model.html | 17 +-
_modules/tensorrt_llm/models/dit/model.html | 17 +-
_modules/tensorrt_llm/models/eagle/model.html | 17 +-
.../tensorrt_llm/models/enc_dec/model.html | 17 +-
.../tensorrt_llm/models/falcon/config.html | 17 +-
.../tensorrt_llm/models/falcon/model.html | 17 +-
.../tensorrt_llm/models/gemma/config.html | 17 +-
_modules/tensorrt_llm/models/gemma/model.html | 17 +-
_modules/tensorrt_llm/models/gpt/config.html | 17 +-
_modules/tensorrt_llm/models/gpt/model.html | 17 +-
_modules/tensorrt_llm/models/gptj/config.html | 17 +-
_modules/tensorrt_llm/models/gptj/model.html | 17 +-
.../tensorrt_llm/models/gptneox/model.html | 17 +-
.../tensorrt_llm/models/llama/config.html | 17 +-
_modules/tensorrt_llm/models/llama/model.html | 17 +-
_modules/tensorrt_llm/models/mamba/model.html | 17 +-
.../tensorrt_llm/models/medusa/config.html | 17 +-
.../tensorrt_llm/models/medusa/model.html | 17 +-
.../tensorrt_llm/models/mllama/model.html | 17 +-
.../tensorrt_llm/models/mmdit_sd3/model.html | 17 +-
.../tensorrt_llm/models/modeling_utils.html | 31 +-
_modules/tensorrt_llm/models/mpt/model.html | 17 +-
.../models/multimodal_encoders/config.html | 17 +-
.../models/multimodal_encoders/model.html | 17 +-
_modules/tensorrt_llm/models/opt/model.html | 17 +-
_modules/tensorrt_llm/models/phi/model.html | 17 +-
_modules/tensorrt_llm/models/phi3/model.html | 17 +-
.../models/recurrentgemma/model.html | 17 +-
.../tensorrt_llm/models/redrafter/model.html | 17 +-
_modules/tensorrt_llm/plugin/plugin.html | 88 +-
_modules/tensorrt_llm/quantization/mode.html | 17 +-
.../quantization/quantize_by_modelopt.html | 17 +-
.../runtime/enc_dec_model_runner.html | 17 +-
_modules/tensorrt_llm/runtime/generation.html | 17 +-
.../runtime/kv_cache_manager.html | 17 +-
.../tensorrt_llm/runtime/model_runner.html | 17 +-
.../runtime/model_runner_cpp.html | 21 +-
.../runtime/multimodal_model_runner.html | 17 +-
_modules/tensorrt_llm/runtime/session.html | 17 +-
_modules/tensorrt_llm/sampling_params.html | 50 +-
_sources/_cpp_gen/executor.rst.txt | 6 +
_sources/advanced/kv-cache-management.md.txt | 75 ++
.../lowprecision-pcie-allreduce.md.txt | 65 +
...tice_on_DeepSeek-R1_in_TensorRT-LLM.md.txt | 44 +-
...-R1_Performance_on_NVIDIA_B200_GPUs.md.txt | 14 +-
...MTP_Implementation_and_Optimization.md.txt | 252 ++++
...Throughput_on_NVIDIA_Blackwell_GPUs.md.txt | 174 +++
_sources/examples/index.rst.txt | 1 +
_sources/examples/llm_api_examples.rst.txt | 1 +
_sources/examples/llm_eagle2_decoding.rst.txt | 8 +
_sources/index.rst.txt | 2 +
_sources/llm-api/reference.rst.txt | 20 +
_sources/performance/perf-benchmarking.md.txt | 3 +-
_sources/performance/perf-overview.md.txt | 9 +-
_sources/torch/attention.md.txt | 2 +-
_sources/torch/kv_cache_manager.md.txt | 2 +
advanced/disaggregated-service.html | 17 +-
advanced/executor.html | 17 +-
advanced/expert-parallelism.html | 23 +-
advanced/gpt-attention.html | 17 +-
advanced/gpt-runtime.html | 17 +-
advanced/graph-rewriting.html | 17 +-
advanced/kv-cache-management.html | 781 +++++++++++
advanced/kv-cache-reuse.html | 23 +-
advanced/lora.html | 17 +-
advanced/lowprecision-pcie-allreduce.html | 725 ++++++++++
advanced/speculative-decoding.html | 17 +-
advanced/weight-streaming.html | 17 +-
architecture/add-model.html | 17 +-
architecture/checkpoint.html | 17 +-
architecture/core-concepts.html | 17 +-
architecture/model-weights-loader.html | 17 +-
architecture/overview.html | 17 +-
architecture/workflow.html | 17 +-
...actice_on_DeepSeek-R1_in_TensorRT-LLM.html | 61 +-
blogs/Falcon180B-H200.html | 17 +-
blogs/H100vsA100.html | 17 +-
blogs/H200launch.html | 17 +-
blogs/XQA-kernel.html | 17 +-
blogs/quantization-in-TRT-LLM.html | 17 +-
...ek-R1_Performance_on_NVIDIA_B200_GPUs.html | 31 +-
...1_MTP_Implementation_and_Optimization.html | 946 +++++++++++++
...1_Throughput_on_NVIDIA_Blackwell_GPUs.html | 904 +++++++++++++
commands/trtllm-build.html | 21 +-
commands/trtllm-serve.html | 21 +-
dev-on-cloud/build-image-to-dockerhub.html | 17 +-
dev-on-cloud/dev-on-runpod.html | 17 +-
examples/curl_chat_client.html | 17 +-
examples/curl_chat_client_for_multimodal.html | 17 +-
examples/curl_completion_client.html | 17 +-
examples/customization.html | 17 +-
examples/deepseek_r1_reasoning_parser.html | 17 +-
examples/genai_perf_client.html | 17 +-
.../genai_perf_client_for_multimodal.html | 17 +-
examples/index.html | 18 +-
examples/llm_api_examples.html | 18 +-
examples/llm_auto_parallel.html | 17 +-
examples/llm_eagle2_decoding.html | 717 ++++++++++
examples/llm_eagle_decoding.html | 105 +-
examples/llm_guided_decoding.html | 17 +-
examples/llm_inference.html | 17 +-
examples/llm_inference_async.html | 17 +-
examples/llm_inference_async_streaming.html | 17 +-
examples/llm_inference_customize.html | 17 +-
examples/llm_inference_distributed.html | 17 +-
examples/llm_inference_kv_events.html | 113 +-
examples/llm_logits_processor.html | 218 +--
examples/llm_lookahead_decoding.html | 17 +-
examples/llm_medusa_decoding.html | 17 +-
examples/llm_mgmn_llm_distributed.html | 17 +-
examples/llm_mgmn_trtllm_bench.html | 54 +-
examples/llm_mgmn_trtllm_serve.html | 17 +-
examples/llm_multilora.html | 17 +-
examples/llm_quantization.html | 17 +-
examples/openai_chat_client.html | 17 +-
.../openai_chat_client_for_multimodal.html | 17 +-
examples/openai_completion_client.html | 17 +-
examples/trtllm_serve_examples.html | 17 +-
genindex.html | 471 ++++++-
index.html | 22 +-
installation/build-from-source-linux.html | 17 +-
installation/grace-hopper.html | 17 +-
installation/linux.html | 17 +-
key-features.html | 17 +-
llm-api/index.html | 17 +-
llm-api/reference.html | 1165 ++++++++++++++++-
objects.inv | Bin 138114 -> 144697 bytes
overview.html | 17 +-
performance/perf-analysis.html | 17 +-
performance/perf-benchmarking.html | 20 +-
performance/perf-overview.html | 25 +-
.../benchmarking-default-performance.html | 17 +-
.../deciding-model-sharding-strategy.html | 17 +-
.../fp8-quantization.html | 17 +-
.../performance-tuning-guide/index.html | 17 +-
...ing-max-batch-size-and-max-num-tokens.html | 17 +-
.../useful-build-time-flags.html | 17 +-
.../useful-runtime-flags.html | 17 +-
py-modindex.html | 17 +-
python-api/tensorrt_llm.functional.html | 23 +-
python-api/tensorrt_llm.layers.html | 17 +-
python-api/tensorrt_llm.models.html | 23 +-
python-api/tensorrt_llm.plugin.html | 17 +-
python-api/tensorrt_llm.quantization.html | 17 +-
python-api/tensorrt_llm.runtime.html | 17 +-
quick-start-guide.html | 17 +-
reference/memory.html | 17 +-
reference/precision.html | 17 +-
reference/support-matrix.html | 17 +-
reference/troubleshooting.html | 17 +-
release-notes.html | 17 +-
search.html | 17 +-
searchindex.js | 2 +-
torch.html | 17 +-
torch/adding_new_model.html | 17 +-
torch/arch_overview.html | 17 +-
torch/attention.html | 19 +-
torch/kv_cache_manager.html | 18 +-
torch/scheduler.html | 17 +-
197 files changed, 12348 insertions(+), 1033 deletions(-)
create mode 100644 _downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
create mode 100644 _images/8x_l20_L40S_node_architecture.png
create mode 100644 _images/tech_blog3_mla_absorb.png
create mode 100644 _sources/advanced/kv-cache-management.md.txt
create mode 100644 _sources/advanced/lowprecision-pcie-allreduce.md.txt
create mode 100644 _sources/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md.txt
create mode 100644 _sources/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md.txt
create mode 100644 _sources/examples/llm_eagle2_decoding.rst.txt
create mode 100644 advanced/kv-cache-management.html
create mode 100644 advanced/lowprecision-pcie-allreduce.html
create mode 100644 blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
create mode 100644 blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
create mode 100644 examples/llm_eagle2_decoding.html
diff --git a/.buildinfo b/.buildinfo
index 40066c9e5f..e399b071ba 100644
--- a/.buildinfo
+++ b/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 12c1352bd1428d2c6ac709024163b9d8
+config: 5c850ce0a6f2d0ce79a91d25fbeeb241
tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/_cpp_gen/executor.html b/_cpp_gen/executor.html
index 20c88f06f3..f1700a377d 100644
--- a/_cpp_gen/executor.html
+++ b/_cpp_gen/executor.html
@@ -51,7 +51,7 @@
@@ -63,7 +63,7 @@
-
+
@@ -336,6 +336,7 @@
Generate Text Asynchronously
Distributed LLM Generation
Control generated text using logits processor
+Generate Text Using Eagle2 Decoding
Get KV Cache Events
Generate Text Using Lookahead Decoding
Generation with Quantization
@@ -357,6 +358,7 @@
Generate Text Asynchronously
Distributed LLM Generation
Control generated text using logits processor
+Generate Text Using Eagle2 Decoding
Get KV Cache Events
Generate Text Using Lookahead Decoding
Generation with Quantization
@@ -421,6 +423,7 @@
Graph Rewriting Module
Run gpt-2b + LoRA using Executor / cpp runtime
Expert Parallelism in TensorRT-LLM
+KV Cache Management: Pools, Blocks, and Events
KV cache reuse
Speculative Sampling
Disaggregated-Service (experimental)
@@ -455,6 +458,7 @@
Speed up inference with SOTA quantization techniques in TRT-LLM
New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget
Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs
+DeepSeek R1 MTP Implementation and Optimization
@@ -1250,6 +1254,553 @@
+
+
+transferAgent.h
+
+-
+namespace tensorrt_llm
+
+-
+namespace executor
+
+-
+namespace kv_cache
+
+
Typedefs
+
+-
+using TransferDescs = MemoryDescs
+
+
+
+-
+using RegisterDescs = MemoryDescs
+
+
+
+-
+using SyncMessage = std::string
+
+
+
+-
+using ConnectionInfoType = std::string
+
+
+
+
+
Enums
+
+-
+enum class MemoryType : uint8_t
+Values:
+
+-
+enumerator kDRAM
+
+
+
+-
+enumerator kVRAM
+
+
+
+-
+enumerator kBLK
+
+
+
+-
+enumerator kOBJ
+
+
+
+-
+enumerator kFILE
+
+
+
+
+
+-
+enum class TransferOp : uint8_t
+Values:
+
+-
+enumerator kREAD
+
+
+
+-
+enumerator kWRITE
+
+
+
+
+
+
+
Functions
+
+-
+template<typename ...Args>
std::unique_ptr<BaseTransferAgent> makeTransferAgent(
+
+
+- std::string const &backend,
+- Args&&... args,
+
+
+)
+
+
+
+
+-
+class AgentDesc
+
+
Public Functions
+
+-
+inline AgentDesc(std::string backendAgentDesc)
+
+
+
+-
+inline std::string const &getBackendAgentDesc() const noexcept
+
+
+
+
+
Private Members
+
+-
+std::string mBackendAgentDesc
+
+
+
+
+
+
+-
+struct BaseAgentConfig
+
+
Public Members
+
+-
+std::string mName
+
+
+
+-
+bool useProgThread
+
+
+
+
+
+
+-
+class BaseTransferAgent
+
+
Public Functions
+
+-
+virtual ~BaseTransferAgent() = default
+
+
+
+-
+virtual void registerMemory(RegisterDescs const &descs) = 0
+
+
+
+-
+virtual void deregisterMemory(RegisterDescs const &descs) = 0
+
+
+
+-
+virtual void loadRemoteAgent(
+
+
+- std::string const &name,
+- AgentDesc const &agentDesc,
+
+
+) = 0
+
+
+
+-
+virtual AgentDesc getLocalAgentDesc() = 0
+
+
+
+-
+virtual void invalidateRemoteAgent(std::string const &name) = 0
+
+
+
+-
+virtual std::unique_ptr<TransferStatus> submitTransferRequests(
+
+
+- TransferRequest const &request,
+
+
+) = 0
+
+
+
+-
+virtual void notifySyncMessage(
+
+
+- std::string const &name,
+- SyncMessage const &syncMessage,
+
+
+) = 0
+
+
+
+-
+virtual std::unordered_map<std::string, std::vector<SyncMessage>> getNotifiedSyncMessages(
+
+
+
+
+) = 0
+
+
+
+-
+virtual ConnectionInfoType getConnectionInfo() = 0
+
+
+
+-
+virtual void connectRemoteAgent(
+
+
+- std::string const &name,
+- ConnectionInfoType const &connectionInfo,
+
+
+) = 0
+
+
+
+-
+virtual bool checkRemoteDescs(
+
+
+- std::string const &name,
+- MemoryDescs const &memoryDescs,
+
+
+) = 0
+
+
+
+
+
+
+-
+class DynLibLoader
+
+
Public Functions
+
+-
+void *getHandle(std::string const &name)
+
+
+
+-
+template<typename FunctionT>
inline FunctionT getFunctionPointer(
+
+
+- std::string const &libName,
+- std::string const &funcName,
+
+
+)
+
+
+
+-
+~DynLibLoader()
+
+
+
+-
+DynLibLoader() = default
+
+
+
+-
+DynLibLoader(DynLibLoader const&) = delete
+
+
+
+-
+DynLibLoader &operator=(DynLibLoader const&) = delete
+
+
+
+
+
Public Static Functions
+
+-
+static DynLibLoader &getInstance()
+
+
+
+
+
Private Members
+
+-
+std::mutex mDllMutex
+
+
+
+-
+std::unordered_map<std::string, void*> mHandlers
+
+
+
+
+
Private Static Functions
+
+-
+static void *dlSym(void *handle, char const *symbol)
+
+
+
+
+
+
+-
+class MemoryDesc
+
+
Public Functions
+
+-
+inline MemoryDesc(
+
+
+- std::vector<char> const &vec,
+- uint32_t deviceId = 0,
+
+
+)
+
+
+
+-
+inline MemoryDesc(void *addr, size_t len, uint32_t deviceId)
+
+
+
+-
+inline MemoryDesc(uintptr_t addr, size_t len, uint32_t deviceId)
+
+
+
+-
+inline uintptr_t getAddr() const noexcept
+
+
+
+-
+inline size_t getLen() const noexcept
+
+
+
+-
+inline uint32_t getDeviceId() const noexcept
+
+
+
+
+
Public Static Functions
+
+-
+static void serialize(MemoryDesc const &memoryDesc, std::ostream &os)
+
+
+
+-
+static MemoryDesc deserialize(std::istream &is)
+
+
+
+-
+static size_t serializedSize(MemoryDesc const &memoryDesc)
+
+
+
+
+
Private Members
+
+-
+uintptr_t mAddr
+
+
+
+-
+size_t mLen
+
+
+
+-
+uint32_t mDeviceId
+
+
+
+
+
+
+-
+class MemoryDescs
+
+
Public Functions
+
+-
+inline MemoryDescs(MemoryType type, std::vector<MemoryDesc> descs)
+
+
+
+-
+inline MemoryType getType() const noexcept
+
+
+
+-
+inline std::vector<MemoryDesc> const &getDescs() const noexcept
+
+
+
+
+
+
+
+-
+class TransferRequest
+
+
Public Functions
+
+-
+inline TransferRequest(
+
+
+- TransferOp op,
+- TransferDescs srcDescs,
+- TransferDescs dstDescs,
+- std::string const &remoteName,
+- std::optional<SyncMessage> syncMessage = std::nullopt,
+
+
+)
+
+
+
+-
+inline TransferOp getOp() const noexcept
+
+
+
+-
+inline TransferDescs const &getSrcDescs() const noexcept
+
+
+
+-
+inline TransferDescs const &getDstDescs() const noexcept
+
+
+
+-
+inline std::string const &getRemoteName() const noexcept
+
+
+
+-
+inline std::optional<SyncMessage> getSyncMessage() const noexcept
+
+
+
+
+
+
+
+-
+class TransferStatus
+
+
Public Functions
+
+-
+virtual ~TransferStatus() = default
+
+
+
+-
+virtual bool isCompleted() const = 0
+
+
+
+-
+virtual void wait() const = 0
+
+
+
+
+
+
+
+
+
+
+
serialization.h
@@ -1514,6 +2065,28 @@
static size_t serializedSize(kv_cache::SocketState const &state)
+
+-
+static kv_cache::AgentState deserializeAgentState(std::istream &is)
+
+
+
+-
+static void serialize(
+
+
+- kv_cache::AgentState const &state,
+- std::ostream &os,
+
+
+)
+
+
+
+-
+static size_t serializedSize(kv_cache::AgentState const &state)
+
+
-
static kv_cache::CacheState deserializeCacheState(std::istream &is)
@@ -2398,6 +2971,34 @@
)
+
+-
+static SpecDecodingStats deserializeSpecDecodingStats(
+
+
+- std::istream &is,
+
+
+)
+
+
+
+-
+static void serialize(
+
+
+- SpecDecodingStats const &specDecStats,
+- std::ostream &os,
+
+
+)
+
+
+
+-
+static size_t serializedSize(SpecDecodingStats const &specDecStats)
+
+
-
static IterationStats deserializeIterationStats(
@@ -2620,8 +3221,8 @@
--
-namespace kv_cache
+-
+namespace kv_cache
@@ -2649,6 +3250,11 @@
using SizeType32 = std::int32_t
+
+-
+using SizeType64 = std::int64_t
+
+
-
using FloatType = float
@@ -3045,6 +3651,31 @@
+
+-
+enum class KvCacheTransferMode
+Enum describing the transfer mode for KV cache.
+Values:
+
+-
+enumerator DRAM
+Copy to/from CPU memory (original approach).
+
+
+
+-
+enumerator GDS
+Attempt GPUDirect Storage (cuFile).
+
+
+
+-
+enumerator POSIX_DEBUG_FALLBACK
+Force a POSIX read/write for debugging.
+
+
+
+
Functions
@@ -3787,6 +4418,12 @@
Stats specific to inflight batching.
+
+-
+std::optional<SpecDecodingStats> specDecStats
+Stats specific to speculative decoding.
+
+
@@ -4133,6 +4770,53 @@
+
+-
+struct SpecDecodingStats
+
+#include <types.h>
+Struct that holds speculative decoding stats.
+
+
Public Members
+
+-
+SizeType64 numDraftTokens
+Total number of proposed draft tokens for all requests.
+
+
+
+-
+SizeType64 numAcceptedTokens
+Total number of accepted draft tokens for all requests.
+
+
+
+-
+SizeType64 numRequestsWithDraftTokens
+Number of requests with at least one draft token in batch.
+
+
+
+-
+double acceptanceLength
+Acceptance length, defined as average number of tokens produced per step for all requests with at least one draft token.
+
+
+
+-
+double iterLatencyMS
+Iteration latency for draft token generation only (ms)
+
+
+
+-
+double draftOverhead
+Draft overhead, defined as iterLatencyMS (specdec) / iterLatencyMS (total)
+
+
+
+
+
-
struct StaticBatchingStats
@@ -6909,16 +7593,18 @@
--
-explicit KvCacheRetentionConfig(
+
-
+explicit KvCacheRetentionConfig(
- std::vector<TokenRangeRetentionConfig> const &tokenRangeRetentionPriorities,
- RetentionPriority decodeRetentionPriority = kDefaultRetentionPriority,
- std::optional<std::chrono::milliseconds> decodeDurationMs = std::nullopt,
+- KvCacheTransferMode transferMode = KvCacheTransferMode::DRAM,
+- std::optional<std::string> directory = std::nullopt,
-)
+)
@@ -6946,6 +7632,16 @@
) const
+
+-
+KvCacheTransferMode getTransferMode() const
+
+
+
+-
+std::optional<std::string> getDirectory() const
+
+
-
std::vector<RetentionPriorityAndDuration> getPerBlockRetentionPriorityDuration(
@@ -7003,6 +7699,18 @@
The duration in ms that decode blocks should remain at their assigned priority level.
+
+-
+KvCacheTransferMode mTransferMode
+The transfer mode for the block.
+
+
+
+-
+std::optional<std::string> mDirectory
+Name of the directory if transfer mode is GDS or POSIX_DEBUG_FALLBACK.
+
+
-
@@ -9796,7 +10504,48 @@
-
namespace kv_cache
-
+
+-
+struct AgentState
+
+
Public Functions
+
+-
+inline AgentState(std::string agentName, std::string connectionInfo)
+
+
+
+-
+AgentState() = default
+
+
+
+-
+inline bool operator==(AgentState const &other) const noexcept
+
+
+
+-
+inline std::string toString() const
+
+
+
+
+
Public Members
+
+-
+std::string mAgentName
+
+
+
+-
+std::string mConnectionInfo
+
+
+
+
+
+
-
class CacheState
@@ -10085,6 +10834,18 @@
inline CommState(std::uint16_t port,
std::string ip)
+
+-
+inline explicit CommState(
+
+
+- std::vector<AgentState> agentState,
+- int selfIdx = -1,
+
+
+)
+
+
-
inline bool isMpiState() const noexcept
@@ -10095,6 +10856,11 @@
inline bool isSocketState() const noexcept
+
+-
+inline bool isAgentState() const noexcept
+
+
-
inline MpiState const &getMpiState() const
@@ -10105,6 +10871,11 @@
inline std::vector<SocketState> const &getSocketState() const
+
+-
+inline std::vector<AgentState> const &getAgentState() const
+
+
-
inline int getSelfIdx() const noexcept
@@ -10125,7 +10896,7 @@
Private Members
-
-std::variant<std::monostate, MpiState, std::vector<SocketState>> mState
+std::variant<std::monostate, MpiState, std::vector<SocketState>, std::vector<AgentState>> mState
@@ -10215,11 +10986,6 @@
namespace tensorrt_llm
-
-namespace batch_manager
-
-
-
--
namespace executor
-
@@ -10496,6 +11262,112 @@
tensorrt_llm::runtime
+- transferAgent.h
+
- serialization.h
- types.h
+KvCacheTransferMode
+
operator<<()
operator<<()
tensorrt_llm::executor::DebugTensorsPerIteration
tensorrt_llm::executor::KvCacheStats
+tensorrt_llm::executor::SpecDecodingStats
+
tensorrt_llm::executor::StaticBatchingStats
tensorrt_llm::executor::KvCacheRetentionConfig
+
+
diff --git a/_cpp_gen/runtime.html b/_cpp_gen/runtime.html
index f354db2294..a6ee809136 100644
--- a/_cpp_gen/runtime.html
+++ b/_cpp_gen/runtime.html
@@ -51,7 +51,7 @@
@@ -63,7 +63,7 @@
-
+
@@ -336,6 +336,7 @@
- Generate Text Asynchronously
- Distributed LLM Generation
- Control generated text using logits processor
+- Generate Text Using Eagle2 Decoding
- Get KV Cache Events
- Generate Text Using Lookahead Decoding
- Generation with Quantization
@@ -357,6 +358,7 @@
- Generate Text Asynchronously
- Distributed LLM Generation
- Control generated text using logits processor
+- Generate Text Using Eagle2 Decoding
- Get KV Cache Events
- Generate Text Using Lookahead Decoding
- Generation with Quantization
@@ -421,6 +423,7 @@
- Graph Rewriting Module
- Run gpt-2b + LoRA using Executor / cpp runtime
- Expert Parallelism in TensorRT-LLM
+- KV Cache Management: Pools, Blocks, and Events
- KV cache reuse
- Speculative Sampling
- Disaggregated-Service (experimental)
@@ -455,6 +458,7 @@
- Speed up inference with SOTA quantization techniques in TRT-LLM
- New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget
- Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs
+- DeepSeek R1 MTP Implementation and Optimization
@@ -2190,6 +2194,18 @@
) const
+
+-
+inline SizeType32 getFirstLocalLayer(
+
+
+- SizeType32 pipelineParallelism = 1,
+- SizeType32 pipelineParallelismRank = 0,
+
+
+) const
+
+
-
inline SizeType32 countLowerRankLayers(
@@ -2204,8 +2220,15 @@
--
-inline SizeType32 getNbLayers(SizeType32 pipelineParallelism = 1) const
+-
+inline SizeType32 getNbLayers(
+
+
+- SizeType32 pipelineParallelism = 1,
+- SizeType32 pipelineParallelismRank = 0,
+
+
+) const
@@ -11199,6 +11222,19 @@ one more than decoding draft tokens for prediction from primary head
+
+-
+TensorPtr getSequenceLengths(SizeType32 batchIdx) const
+
+- Parameters:
+batchIdx – index of the batch
+
+- Returns:
+[maxBeamWidth], sequence lengths for request batchIdx, on gpu
+
+
+
+
-
TensorPtr getAllNewTokens() const
@@ -11270,6 +11306,11 @@ one more than decoding draft tokens for prediction from primary head
+
+-
+SizeType32 getMaxBatchSize() const
+
+
-
SizeType32 getMaxBeamWidth() const
@@ -11500,6 +11541,11 @@ one more than decoding draft tokens for prediction from primary head
TensorPtr mAllReduceCommPtrs
+
+-
+TensorPtr mFlagPtrs
+
+
-
std::vector<runtime::IpcMemory> mIpcMemoryHandles
@@ -12171,8 +12217,9 @@ one more than decoding draft tokens for prediction from primary head
getVocabSize()
getVocabSizePadded()
countLocalLayers()
+getFirstLocalLayer()
countLowerRankLayers()
-getNbLayers()
+getNbLayers()
getNbAttentionLayers()
getNbRnnLayers()
getNbHeads()
@@ -13526,6 +13573,7 @@ one more than decoding draft tokens for prediction from primary head
getLogProbs()
getLogProbs()
getSequenceLengths()
+getSequenceLengths()
getAllNewTokens()
getNextDraftTokens()
getPrevDraftTokensLengths()
@@ -13533,6 +13581,7 @@ one more than decoding draft tokens for prediction from primary head
getAcceptedLengthsCumSum()
getAcceptedPackedPaths()
getFinishedSteps()
+getMaxBatchSize()
getMaxBeamWidth()
getMaxSequenceLength()
getMaxDecodingDecoderTokens()
@@ -13566,6 +13615,7 @@ one more than decoding draft tokens for prediction from primary head
TensorPtr
AllReduceBuffers()
mAllReduceCommPtrs
+mFlagPtrs
mIpcMemoryHandles
@@ -13717,6 +13767,15 @@ one more than decoding draft tokens for prediction from primary head
+
+
diff --git a/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py b/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
new file mode 100644
index 0000000000..32dcea9fff
--- /dev/null
+++ b/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
@@ -0,0 +1,1081 @@
+import math
+import weakref
+from enum import IntEnum
+from typing import Optional, Union, cast
+
+import torch
+from torch import nn
+
+from tensorrt_llm.mapping import Mapping
+
+from ..attention_backend import (AttentionInputType, AttentionMetadata,
+ TrtllmAttention, TrtllmAttentionMetadata)
+from ..attention_backend.interface import (PositionalEmbeddingParams,
+ PredefinedAttentionMask)
+from ..attention_backend.utils import create_attention, get_attention_backend
+from ..distributed import AllReduceParams
+from ..model_config import ModelConfig
+from ..peft.lora.layer import LoraLayer, LoraModuleType
+from ..utils import Fp4QuantizedTensor, get_model_extra_attrs
+from .linear import Linear, TensorParallelMode, WeightMode, WeightsLoadingConfig
+from .multi_stream_utils import maybe_execute_in_parallel
+from .rms_norm import RMSNorm
+from .rotary_embedding import RotaryEmbedding
+
+
+class QkNormType(IntEnum):
+ """
+ The type of QK normalization.
+ """
+ none = 0 # No normalization applied to Q and K
+ pre_rope = 1 # Apply normalization before Rope
+ post_rope = 2 # Apply normalization after Rope
+
+
+class Attention(nn.Module):
+
+ def __init__(
+ self,
+ *,
+ hidden_size: int,
+ num_attention_heads: int,
+ num_key_value_heads: int,
+ max_position_embeddings: int,
+ bias: bool,
+ pos_embd_params: Optional[PositionalEmbeddingParams] = None,
+ qk_norm_type: QkNormType = QkNormType.none,
+ layer_idx: Optional[int] = None,
+ dtype: torch.dtype = None,
+ dense_bias: Optional[bool] = None,
+ config: Optional[ModelConfig] = None,
+ q_scaling: float = 1.0,
+ attention_chunk_size: Optional[int] = None,
+ ):
+ """
+ Initialize the Attention module.
+
+ Args:
+ hidden_size (int): The size of the hidden dimension.
+ num_attention_heads (int): The number of attention heads.
+ num_key_value_heads (int): The number of key value heads.
+ max_position_embeddings (int): The maximum position embeddings.
+ bias (bool): Whether to use bias in the linear layers.
+ pos_embd_params (PositionalEmbeddingParams): The positional embedding parameters.
+ qk_norm_type (QkNormType): The type of QK normalization.
+ layer_idx (int): The layer index.
+ dtype (torch.dtype): The data type.
+ dense_bias (bool): Whether to use bias in the output projection layer.
+ config (ModelConfig): The model configuration.
+ q_scaling (float): The scaling factor for the qk_scale. The definition is $O = softmax(QK^T * qk_scale) * V, qk_scale = 1 / (sqrt(head_dim) * q_scaling)$. The default value is 1.0.
+ attention_chunk_size (int): See [Chunked Attention] below.
+ """
+ super().__init__()
+ self.layer_idx = layer_idx
+
+ config = config or ModelConfig()
+ self.hidden_size = hidden_size
+ self.num_heads = num_attention_heads
+ self.head_dim = getattr(config.pretrained_config, "head_dim",
+ self.hidden_size // self.num_heads)
+ self.num_key_value_heads = num_key_value_heads
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+ self.max_position_embeddings = max_position_embeddings
+ self.pos_embd_params = pos_embd_params
+ self.qk_norm_type = qk_norm_type
+ self.dense_bias = dense_bias
+ self.q_scaling = q_scaling
+
+ # [Chunked Attention]
+ # Chunked attention is applied to context requests only. Chunked attention will be
+ # applied when this field is specified and mMaskType == CAUSAL.
+ #
+ # In chunked attention, we break context requests into chunks of a specified size. Tokens can only
+ # attend to tokens in the same chunk. So, for example, if the chunk size is 3, we might have a mask
+ # that looks like this:
+ #
+ # 1 0 0 0 0 0
+ # 1 1 0 0 0 0
+ # 1 1 1 0 0 0
+ # 0 0 0 1 0 0
+ # 0 0 0 1 1 0
+ # 0 0 0 1 1 1
+ self.attention_chunk_size = attention_chunk_size
+
+ if dense_bias is None:
+ self.dense_bias = bias
+
+ # tensor parallel
+ tp_size = config.mapping.tp_size
+ pp_size = config.mapping.pp_size
+ if config.mapping.enable_attention_dp:
+ tp_size = 1
+
+ mapping = Mapping(
+ world_size=tp_size * pp_size,
+ tp_size=tp_size,
+ pp_size=pp_size,
+ rank=config.mapping.rank,
+ gpus_per_node=config.mapping.gpus_per_node,
+ enable_attention_dp=config.mapping.enable_attention_dp,
+ )
+ assert self.num_heads % tp_size == 0
+ self.num_heads = self.num_heads // tp_size
+ self.num_key_value_heads = (self.num_key_value_heads + tp_size -
+ 1) // tp_size
+ self.q_size = self.num_heads * self.head_dim
+ self.kv_size = self.num_key_value_heads * self.head_dim
+
+ self.qkv_proj = Linear(
+ self.hidden_size,
+ tp_size * self.q_size + 2 * tp_size * self.kv_size,
+ bias=bias,
+ dtype=dtype,
+ mapping=mapping,
+ tensor_parallel_mode=TensorParallelMode.COLUMN,
+ weights_loading_config=WeightsLoadingConfig(
+ weight_mode=WeightMode.FUSED_QKV_LINEAR),
+ quant_config=config.get_quant_config(),
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ )
+ self.o_lora = LoraLayer([LoraModuleType.ATTENTION_DENSE],
+ [self.hidden_size])
+
+ self.o_proj = Linear(
+ tp_size * self.q_size,
+ self.hidden_size,
+ bias=self.dense_bias,
+ dtype=dtype,
+ mapping=mapping,
+ tensor_parallel_mode=TensorParallelMode.ROW,
+ quant_config=config.get_quant_config(),
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ lora=self.o_lora,
+ )
+
+ self.quant_config = config.get_quant_config()
+ self.attn_backend = config.attn_backend
+ attn_cls = get_attention_backend(self.attn_backend)
+
+ # These two modules are mutually exclusive - either splitted_qkv_lora or fused_qkv_lora will be used,
+ # but never both at the same time. splitted_qkv_lora handles Q,K,V separately while fused_qkv_lora
+ # handles them as a single fused operation.
+ self.splitted_qkv_lora = LoraLayer([
+ LoraModuleType.ATTENTION_Q, LoraModuleType.ATTENTION_K,
+ LoraModuleType.ATTENTION_V
+ ], [self.q_size, self.kv_size, self.kv_size])
+ self.fused_qkv_lora = LoraLayer([LoraModuleType.ATTENTION_QKV],
+ [self.q_size + 2 * self.kv_size])
+
+ self.o_lora = LoraLayer([LoraModuleType.ATTENTION_DENSE],
+ [self.hidden_size])
+
+ # enable_rope_fusion: Whether to fuse RoPE into the attention OP.
+ # If true, RoPE will be applied in self.attn.forward.
+ # If false, RoPE will be applied in self.apply_rope.
+ self.enable_rope_fusion = attn_cls.support_fused_rope(
+ ) and self.qk_norm_type != QkNormType.post_rope
+
+ self.rotary_emb = None
+ if not self.enable_rope_fusion and self.pos_embd_params is not None:
+ self.rotary_emb = RotaryEmbedding(
+ self.pos_embd_params.rope,
+ head_dim=self.head_dim,
+ is_neox=self.pos_embd_params.is_neox,
+ )
+
+ self.attn = create_attention(
+ self.attn_backend,
+ self.layer_idx,
+ self.num_heads,
+ self.head_dim,
+ self.num_key_value_heads,
+ pos_embd_params=self.pos_embd_params
+ if self.enable_rope_fusion else None,
+ quant_config=self.quant_config,
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ q_scaling=self.q_scaling,
+ attention_chunk_size=self.attention_chunk_size,
+ )
+
+ self.support_fused_qkv = self.attn.support_fused_qkv()
+
+ if not config.skip_create_weights_in_init:
+ self.create_weights()
+
+ def create_weights(self):
+ # self.attn has no weights but has states that are related to quant_config,
+ # which could be modified after __init__
+ self.attn.update_quant_config(self.quant_config)
+
+ def split_qkv(self, q, k=None, v=None):
+ if k is None and v is None:
+ q, k, v = q.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+ return q, k, v
+
+ def convert_qkv(self, q, k, v):
+ if k is None and v is None and not self.support_fused_qkv:
+ q, k, v = self.split_qkv(q)
+ elif k is not None and v is not None and self.support_fused_qkv:
+ qkv = torch.concat([q, k, v], dim=-1)
+ q, k, v = qkv, None, None
+ return q, k, v
+
+ def forward(
+ self,
+ position_ids: Optional[torch.LongTensor],
+ hidden_states: Union[torch.Tensor, Fp4QuantizedTensor],
+ attn_metadata: AttentionMetadata,
+ attention_mask: PredefinedAttentionMask = PredefinedAttentionMask.
+ CAUSAL,
+ mrope_config: Optional[dict] = None,
+ all_reduce_params: Optional[AllReduceParams] = None,
+ lora_params: Optional[dict] = None,
+ attention_window_size: Optional[int] = None,
+ **kwargs,
+ ) -> torch.Tensor:
+ """
+ Forward pass for the Attention module.
+
+ Args:
+ position_ids (Optional[torch.LongTensor]): The position IDs.
+ hidden_states (torch.Tensor): The hidden states.
+ attn_metadata (AttentionMetadata): The attention metadata.
+ attention_mask (PredefinedAttentionMask): The attention mask type.
+ mrope_config (Optional[dict]): The MROPE configuration.
+ all_reduce_params (Optional[AllReduceParams]): The all reduce parameters.
+ lora_params (Optional[dict]): The LoRA parameters.
+ attention_window_size (Optional[int]): The attention window size.
+
+ Returns:
+ torch.Tensor: The output tensor.
+ """
+ qkv = self.qkv_proj(hidden_states)
+
+ if bool(lora_params):
+ qkv_lora = self.splitted_qkv_lora(hidden_states, lora_params,
+ self.layer_idx)
+ if qkv_lora is not None:
+ qkv = qkv + qkv_lora
+
+ qkv_lora = self.fused_qkv_lora(hidden_states, lora_params,
+ self.layer_idx)
+ if qkv_lora is not None:
+ qkv = qkv + qkv_lora
+
+ q, k, v = self.apply_rope(qkv, position_ids)
+
+ out_scale = None
+ if self.o_proj.has_fp8_qdq or self.o_proj.has_nvfp4 or self.o_proj.has_fp8_block_scales:
+ out_scale = self.o_proj.inv_input_scale
+
+ q, k, v = self.convert_qkv(q, k, v)
+ attn_output = self.attn.forward(
+ q,
+ k,
+ v,
+ attn_metadata,
+ out_scale=out_scale,
+ attention_mask=attention_mask,
+ mrope_config=mrope_config,
+ attention_window_size=attention_window_size)
+ hidden_states = attn_output
+ attn_output = self.o_proj(attn_output,
+ all_reduce_params=all_reduce_params,
+ lora_params=lora_params,
+ layer_idx=self.layer_idx)
+ return attn_output
+
+ def apply_qk_norm(self, q, k):
+ raise NotImplementedError(
+ f"QK norm is not implemented for {self.__class__.__name__}."
+ "Please override the `apply_qk_norm` method in the subclass.")
+
+ def apply_rope(self, qkv: torch.Tensor, position_ids: torch.Tensor):
+ """
+ Apply RoPE to the query and key, possibly including QK norm.
+ Args:
+ qkv (torch.Tensor): The query, key, and value tensor.
+ position_ids (torch.Tensor): The position IDs of each token for RoPE.
+ Returns:
+ tuple: A tuple of (q, k, v).
+ This method could be overridden in the subclass, it is possible that k/v is None and q is the concatenated qkv tensor, up to the implementation.
+ Before self.attn.forward, convert_qkv will be called to make sure that the format of (q, k, v) satisfies the requirement of self.attn.
+ """
+ q, k, v = qkv, None, None
+ if self.qk_norm_type == QkNormType.pre_rope:
+ q, k, v = self.split_qkv(q, k, v)
+ q, k = self.apply_qk_norm(q, k)
+ if not self.enable_rope_fusion and position_ids is not None:
+ q, k, v = self.split_qkv(q, k, v)
+ q, k = self.rotary_emb(position_ids, [q, k])
+ if self.qk_norm_type == QkNormType.post_rope:
+ q, k = self.apply_qk_norm(q, k)
+
+ return q, k, v
+
+
+def extract_extra_attrs(layer_idx: str):
+ extra_attrs = get_model_extra_attrs()
+ assert extra_attrs is not None, "Model extra attrs is not set"
+
+ metadata_ref = extra_attrs.get("attention_metadata", None)
+ assert metadata_ref is not None, "Attention metadata is not set"
+ metadata = metadata_ref()
+ assert isinstance(
+ metadata,
+ TrtllmAttentionMetadata,
+ )
+
+ mla_layers = extra_attrs.get("mla_layers", None)
+ assert mla_layers is not None, "MLA layers is not registered"
+ mla_layer_ref = mla_layers.get(layer_idx, None)
+ assert mla_layer_ref is not None, f"Cannot find MLA layer for layer {layer_idx}"
+ mla_layer = mla_layer_ref()
+ assert isinstance(
+ mla_layer,
+ MLA), "MLA layer must be a subclass of MLA or an instance of MLA"
+
+ return metadata, mla_layer
+
+
+@torch.library.custom_op("trtllm::mla_custom_op", mutates_args=())
+def mla_custom_op(
+ position_ids: Optional[torch.Tensor],
+ hidden_states: torch.Tensor,
+ layer_idx: str,
+) -> torch.Tensor:
+ metadata, mla_layer = extract_extra_attrs(layer_idx)
+
+ return mla_layer.forward_impl(position_ids, hidden_states, metadata)
+
+
+@mla_custom_op.register_fake
+def _(position_ids, hidden_states, layer_idx):
+ _, mla_layer = extract_extra_attrs(layer_idx)
+ return mla_layer.forward_impl_fake(hidden_states)
+
+
+class MLA(nn.Module):
+
+ def __init__(
+ self,
+ *,
+ hidden_size: int,
+ num_attention_heads: int,
+ num_key_value_heads: int,
+ qk_nope_head_dim: int,
+ qk_rope_head_dim: int,
+ v_head_dim: int,
+ q_lora_rank: int,
+ kv_lora_rank: int,
+ predicted_tokens_per_seq: int,
+ max_position_embeddings: int,
+ bias: bool,
+ aux_stream: Optional[torch.cuda.Stream] = None,
+ pos_embd_params: Optional[PositionalEmbeddingParams] = None,
+ layer_idx: Optional[int] = None,
+ dtype: torch.dtype = None,
+ dense_bias: Optional[bool] = None,
+ config: Optional[ModelConfig] = None,
+ ):
+ """
+ Initialize the MLA module.
+
+ Args:
+ hidden_size (int): The size of the hidden dimension.
+ num_attention_heads (int): The number of attention heads.
+ num_key_value_heads (int): The number of key value heads.
+ qk_nope_head_dim (int): The dimension of the query and key without Rope.
+ qk_rope_head_dim (int): The dimension of the Rope of query and key.
+ v_head_dim (int): The dimension of the value.
+ q_lora_rank (int): The dimension of the compressed query.
+ kv_lora_rank (int): The dimension of the compressed key and value.
+ predicted_tokens_per_seq (int): The number of predicted tokens per sequence.
+ max_position_embeddings (int): The maximum position embeddings.
+ bias (bool): Whether to use bias in the linear layers.
+ aux_stream (Optional[torch.cuda.Stream]): The auxiliary CUDA stream for running operations in two parallel streams.
+ pos_embd_params (PositionalEmbeddingParams): The positional embedding parameters.
+ layer_idx (int): The layer index.
+ dtype (torch.dtype): The data type.
+ dense_bias (bool): Whether to use bias in the output projection layer.
+ config (ModelConfig): The model configuration.
+ """
+ super().__init__()
+ self.layer_idx = layer_idx
+ self.layer_idx_str = str(layer_idx)
+ self.dtype = dtype
+
+ self.hidden_size = hidden_size
+ self.num_heads = num_attention_heads
+ self.num_key_value_heads = num_key_value_heads
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+ self.qk_nope_head_dim = qk_nope_head_dim
+ self.qk_rope_head_dim = qk_rope_head_dim
+ self.qk_head_dim = qk_nope_head_dim + qk_rope_head_dim
+ self.v_head_dim = v_head_dim
+ self.q_lora_rank = q_lora_rank
+ self.kv_lora_rank = kv_lora_rank
+ self.predicted_tokens_per_seq = predicted_tokens_per_seq
+ self.max_position_embeddings = max_position_embeddings
+ self.pos_embd_params = pos_embd_params
+ self.dense_bias = dense_bias
+ if dense_bias is None:
+ self.dense_bias = bias
+
+ if self.q_lora_rank is None:
+ self.q_lora_rank = hidden_size
+ self.is_lite = True
+ else:
+ self.is_lite = False
+
+ assert pos_embd_params is not None, "pos_embd_params must be provided in MLA"
+
+ self.register_to_config = False
+ if config is not None:
+ if "mla_layers" not in config.extra_attrs:
+ config.extra_attrs["mla_layers"] = {}
+ config.extra_attrs["mla_layers"][self.layer_idx_str] = weakref.ref(
+ self)
+ self.register_to_config = True
+
+ # tensor parallel
+ config = config or ModelConfig()
+ tp_size = config.mapping.tp_size
+ pp_size = config.mapping.pp_size
+ if config.mapping.enable_attention_dp:
+ tp_size = 1
+
+ mapping = Mapping(
+ world_size=tp_size * pp_size,
+ tp_size=tp_size,
+ pp_size=pp_size,
+ rank=config.mapping.rank,
+ gpus_per_node=config.mapping.gpus_per_node,
+ enable_attention_dp=config.mapping.enable_attention_dp,
+ )
+
+ assert self.num_heads % tp_size == 0
+ self.num_heads = self.num_heads // tp_size
+ self.num_key_value_heads = (self.num_key_value_heads + tp_size -
+ 1) // tp_size
+
+ rms_norm_eps = config.pretrained_config.rms_norm_eps
+ quant_config = config.get_quant_config()
+ self.quant_config = quant_config
+
+ if not self.is_lite:
+ self.fused_a = Linear(
+ hidden_size,
+ self.q_lora_rank + self.kv_lora_rank + self.qk_rope_head_dim,
+ bias=bias,
+ dtype=dtype,
+ quant_config=quant_config,
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ use_custom_cublas_mm=True)
+
+ self.q_a_layernorm = RMSNorm(hidden_size=self.q_lora_rank,
+ eps=rms_norm_eps,
+ dtype=dtype)
+
+ self.q_b_proj = Linear(
+ self.q_lora_rank,
+ tp_size * self.num_heads * self.qk_head_dim,
+ bias=bias,
+ dtype=dtype,
+ mapping=mapping,
+ tensor_parallel_mode=TensorParallelMode.COLUMN,
+ quant_config=quant_config,
+ skip_create_weights_in_init=config.skip_create_weights_in_init)
+ else:
+ self.fused_a = Linear(
+ hidden_size,
+ self.kv_lora_rank + self.qk_rope_head_dim,
+ bias=bias,
+ dtype=dtype,
+ quant_config=quant_config,
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ use_custom_cublas_mm=True)
+
+ self.q_proj = Linear(
+ self.q_lora_rank,
+ tp_size * self.num_heads * self.qk_head_dim,
+ bias=bias,
+ dtype=dtype,
+ mapping=mapping,
+ tensor_parallel_mode=TensorParallelMode.COLUMN,
+ quant_config=quant_config,
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ )
+ self.q_b_proj = self.q_proj
+
+ self.kv_a_layernorm = RMSNorm(hidden_size=kv_lora_rank,
+ dtype=dtype,
+ eps=rms_norm_eps)
+
+ self.kv_b_proj = Linear(
+ self.kv_lora_rank,
+ tp_size * self.num_heads *
+ (self.qk_nope_head_dim + self.v_head_dim),
+ bias=bias,
+ dtype=dtype,
+ mapping=mapping,
+ tensor_parallel_mode=TensorParallelMode.COLUMN,
+ quant_config=quant_config,
+ skip_create_weights_in_init=config.skip_create_weights_in_init)
+ # This parameter will view into self.kv_b_proj.weight after loading weights.
+ # For dummy weight initialization, this parameter is initialized with empty tensor.
+ # Used in forward_generation only
+ self.v_b_proj = nn.Parameter(
+ torch.empty(
+ (self.num_heads, self.v_head_dim, self.kv_lora_rank),
+ dtype=dtype,
+ ),
+ requires_grad=False,
+ )
+
+ self.o_proj = Linear(
+ self.num_key_value_heads * self.v_head_dim * tp_size,
+ self.hidden_size,
+ bias=self.dense_bias,
+ dtype=dtype,
+ mapping=mapping,
+ tensor_parallel_mode=TensorParallelMode.ROW,
+ quant_config=quant_config,
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ )
+
+ def yarn_get_mscale(scale=1, mscale=1):
+ if scale <= 1:
+ return 1.0
+ return 0.1 * mscale * math.log(scale) + 1.0
+
+ mscale_all_dim = pos_embd_params.rope.mscale_all_dim
+ scaling_factor = pos_embd_params.rope.scale
+ mscale = yarn_get_mscale(scaling_factor, mscale_all_dim)
+ q_scaling = 1.0 / (mscale * mscale)
+
+ self.mha = create_attention(
+ config.attn_backend,
+ self.layer_idx,
+ self.num_heads,
+ head_dim=self.qk_head_dim,
+ num_kv_heads=self.num_key_value_heads,
+ pos_embd_params=pos_embd_params,
+ quant_config=quant_config,
+ q_scaling=q_scaling,
+ is_mla_enable=True,
+ q_lora_rank=self.q_lora_rank,
+ kv_lora_rank=self.kv_lora_rank,
+ qk_nope_head_dim=self.qk_nope_head_dim,
+ qk_rope_head_dim=self.qk_rope_head_dim,
+ v_head_dim=self.v_head_dim,
+ predicted_tokens_per_seq=self.predicted_tokens_per_seq,
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ )
+
+ self.mqa = create_attention(
+ config.attn_backend,
+ self.layer_idx,
+ self.num_heads,
+ head_dim=self.kv_lora_rank + self.qk_rope_head_dim,
+ num_kv_heads=1,
+ pos_embd_params=pos_embd_params,
+ quant_config=quant_config,
+ q_scaling=q_scaling,
+ is_mla_enable=True,
+ q_lora_rank=self.q_lora_rank,
+ kv_lora_rank=self.kv_lora_rank,
+ qk_nope_head_dim=self.qk_nope_head_dim,
+ qk_rope_head_dim=self.qk_rope_head_dim,
+ v_head_dim=self.kv_lora_rank,
+ predicted_tokens_per_seq=self.predicted_tokens_per_seq,
+ skip_create_weights_in_init=config.skip_create_weights_in_init,
+ )
+
+ self.aux_stream = aux_stream
+ self.ln_events = [torch.cuda.Event(), torch.cuda.Event()]
+
+ self.enable_rope_fusion = self.mha.support_fused_rope()
+ self.support_fused_qkv = self.mha.support_fused_qkv()
+ self.rotary_emb = RotaryEmbedding(
+ pos_embd_params.rope,
+ head_dim=self.qk_rope_head_dim,
+ is_neox=pos_embd_params.is_neox,
+ )
+ self.apply_rotary_emb = not self.enable_rope_fusion
+
+ if not config.skip_create_weights_in_init:
+ self.create_weights()
+
+ def create_weights(self):
+ # self.mha/mqa has no weights but has states that are related to quant_config,
+ # which could be modified after __init__
+ self.mha.update_quant_config(self.quant_config)
+ self.mqa.update_quant_config(self.quant_config)
+
+ # k_b_proj_trans's dtype must be consistent with self.kv_b_proj,
+ # which can be modified after __init__
+ has_fp8_block_scales = (
+ self.kv_b_proj.quant_config
+ and self.kv_b_proj.quant_config.quant_mode.has_fp8_block_scales())
+
+ mla_weight_dtype = torch.float8_e4m3fn if has_fp8_block_scales else self.dtype
+ self.k_b_proj_trans = nn.Parameter(
+ torch.empty(
+ (self.num_heads, self.kv_lora_rank, self.qk_nope_head_dim),
+ dtype=mla_weight_dtype,
+ ),
+ requires_grad=False,
+ )
+
+ if has_fp8_block_scales:
+ self.k_b_proj_trans_scale = nn.Parameter(
+ torch.empty(
+ (
+ self.num_heads,
+ self.kv_lora_rank // 128,
+ self.qk_nope_head_dim // 128,
+ ),
+ dtype=torch.float32,
+ ),
+ requires_grad=False,
+ )
+ # This parameter will view into self.kv_b_proj.weight_scale after loading weights.
+ # For dummy weight initialization, this parameter is initialized with empty tensor.
+ self.v_b_proj_scale = nn.Parameter(
+ torch.empty(
+ (
+ self.num_heads,
+ self.v_head_dim // 128,
+ self.kv_lora_rank // 128,
+ ),
+ dtype=torch.float32,
+ ),
+ requires_grad=False,
+ )
+ else:
+ self.k_b_proj_trans_scale = None
+ self.v_b_proj_scale = None
+
+ def apply_rope(
+ self,
+ q: torch.Tensor,
+ k_pe: torch.Tensor,
+ position_ids: torch.Tensor,
+ ) -> torch.Tensor:
+ q = q.view(-1, self.num_heads, self.qk_head_dim)
+ q_pe = q[..., self.qk_nope_head_dim:].reshape(
+ -1, self.num_heads * self.qk_rope_head_dim)
+ q_pe, k_pe = self.rotary_emb(position_ids, [q_pe, k_pe])
+ q[..., self.qk_nope_head_dim:] = q_pe.view(-1, self.num_heads,
+ self.qk_rope_head_dim)
+ return k_pe
+
+ def forward_impl_fake(self, hidden_states: torch.Tensor):
+ num_tokens = hidden_states.shape[0]
+ hidden_size = self.o_proj.in_features
+ return hidden_states.new_empty([num_tokens, hidden_size],
+ dtype=hidden_states.dtype)
+
+ def forward_impl(
+ self,
+ position_ids: Optional[torch.Tensor],
+ hidden_states: torch.Tensor,
+ attn_metadata: AttentionMetadata,
+ ) -> torch.Tensor:
+ """
+ Forward pass for the MLA module.
+
+ Args:
+ position_ids (Optional[torch.LongTensor]): The position IDs.
+ hidden_states (torch.Tensor): The hidden states.
+ attn_metadata (AttentionMetadata): The attention metadata.
+ all_reduce_params (Optional[AllReduceParams]): The all reduce parameters.
+
+ Returns:
+ torch.Tensor: The output tensor.
+ """
+ if self.is_lite:
+ compressed_kv, k_pe = self.fused_a(hidden_states).split(
+ [self.kv_lora_rank, self.qk_rope_head_dim], -1)
+ compressed_kv = self.kv_a_layernorm(compressed_kv)
+ q = hidden_states
+ else:
+ q, compressed_kv, k_pe = self.fused_a(hidden_states).split(
+ [self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim],
+ -1)
+
+ q, compressed_kv = maybe_execute_in_parallel(
+ lambda: self.q_a_layernorm(q),
+ lambda: self.kv_a_layernorm(compressed_kv),
+ self.ln_events[0],
+ self.ln_events[1],
+ self.aux_stream,
+ )
+
+ q, latent_cache = maybe_execute_in_parallel(
+ lambda: self.q_b_proj(q),
+ lambda: torch.concat([compressed_kv, k_pe], dim=-1),
+ self.ln_events[0],
+ self.ln_events[1],
+ self.aux_stream,
+ )
+
+ # split q, k, v into context and gen batches
+ num_contexts = attn_metadata.num_contexts
+ num_generations = attn_metadata.num_generations
+ num_ctx_tokens = attn_metadata.num_ctx_tokens
+ num_tokens = attn_metadata.num_tokens
+
+ assert q.shape[
+ 0] == num_tokens, f"Expect q.shape[0] to be {num_tokens}, but got {q.shape[0]}"
+
+ if num_contexts > 0:
+ q_ctx = q[:num_ctx_tokens, ...]
+ compressed_kv_ctx = compressed_kv[:num_ctx_tokens, ...]
+ k_pe_ctx = k_pe[:num_ctx_tokens, ...]
+ latent_cache_ctx = latent_cache[:num_ctx_tokens, ...]
+ if self.apply_rotary_emb:
+ assert position_ids is not None
+ k_pe_ctx = self.apply_rope(q_ctx, k_pe_ctx, position_ids)
+
+ attn_output_context = self.forward_context(q_ctx, compressed_kv_ctx,
+ k_pe_ctx, attn_metadata,
+ latent_cache_ctx,
+ position_ids)
+ else:
+ attn_output_context = None
+
+ if num_generations > 0:
+ q_gen = q[num_ctx_tokens:, ...]
+ compressed_kv_gen = compressed_kv[num_ctx_tokens:, ...]
+ k_pe_gen = k_pe[num_ctx_tokens:, ...]
+ latent_cache_gen = latent_cache[num_ctx_tokens:, ...]
+ if self.apply_rotary_emb:
+ assert position_ids is not None
+ k_pe_gen = self.apply_rope(q_gen, k_pe_gen, position_ids)
+
+ attn_output_gen = self.forward_generation(q_gen, compressed_kv_gen,
+ k_pe_gen, attn_metadata,
+ latent_cache_gen)
+ else:
+ attn_output_gen = None
+
+ # release pytorch activation memory
+ q = None
+ compressed_kv = None
+ k_pe = None
+
+ # merge context and gen batches
+ if attn_output_context is not None and attn_output_gen is not None:
+ assert (
+ len(attn_output_context.shape) == 2
+ ), f"attn_output_context must be rank 2, not {len(attn_output_context.shape)}"
+ assert (
+ len(attn_output_gen.shape) == 2
+ ), f"attn_output_gen must be rank 2, not {len(attn_output_gen.shape)}"
+ attn_output = torch.cat([attn_output_context, attn_output_gen],
+ dim=0)
+ # release pytorch activation memory
+ attn_output_context = None
+ attn_output_gen = None
+ elif attn_output_gen is None:
+ attn_output = attn_output_context
+ else:
+ attn_output = attn_output_gen
+
+ return attn_output
+
+ def _maybe_concat_qkv(self, q, k, v):
+ if k is not None and v is not None and self.support_fused_qkv:
+ qkv = torch.concat([q, k, v], dim=-1)
+ q, k, v = qkv, None, None
+ return q, k, v
+
+ def forward_context_default(
+ self,
+ q: torch.Tensor,
+ compressed_kv: torch.Tensor,
+ k_pe: torch.Tensor,
+ attn_metadata: AttentionMetadata,
+ latent_cache: Optional[torch.Tensor] = None,
+ ) -> torch.Tensor:
+ kv = self.kv_b_proj(compressed_kv)
+ k_nope, v = kv.split(
+ [
+ self.num_heads * self.qk_nope_head_dim,
+ self.num_heads * self.v_head_dim
+ ],
+ -1,
+ )
+
+ k = torch.empty_like(q).view(-1, self.num_heads, self.qk_head_dim)
+ k[..., :self.qk_nope_head_dim] = k_nope.view(-1, self.num_heads,
+ self.qk_nope_head_dim)
+ if self.apply_rotary_emb:
+ k[..., self.qk_nope_head_dim:] = k_pe.view(-1, 1,
+ self.qk_rope_head_dim)
+ k = k.view(-1, self.num_heads * self.qk_head_dim)
+
+ # May concat q(including q_pe), k + k_pe, v together
+ q, k, v = self._maybe_concat_qkv(q, k, v)
+
+ # out_scale = getattr(self.o_proj, "inv_input_scale", None)
+ out_scale = None # Currently we use BF16 MHA for context phase
+
+ attn_output = self.mha.forward(
+ q,
+ k,
+ v,
+ attn_metadata,
+ attention_input_type=AttentionInputType.context_only,
+ latent_cache=latent_cache,
+ out_scale=out_scale,
+ )
+
+ return attn_output
+
+ def forward_context_with_cached_kv(
+ self,
+ q: torch.Tensor,
+ compressed_kv: torch.Tensor,
+ k_pe: torch.Tensor,
+ attn_metadata: AttentionMetadata,
+ position_ids: Optional[torch.LongTensor] = None,
+ ) -> torch.Tensor:
+ trtllm_attention = cast(TrtllmAttention, self.mha)
+ # split current q into q_nope and q_pe
+ q_nope, q_pe = q.view([
+ -1, self.num_heads, self.qk_nope_head_dim + self.qk_rope_head_dim
+ ]).split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+
+ # apply rope to current q_pe and k_pe
+ assert position_ids is not None
+ assert position_ids.dim() == 1 or (position_ids.dim() == 2
+ and position_ids.shape[0] == 1)
+ assert self.rotary_emb is not None
+ assert self.rotary_emb.head_dim == self.qk_rope_head_dim
+ assert q_pe.shape[0] == k_pe.shape[0]
+ q_pe = q_pe.contiguous().view(-1,
+ self.num_heads * self.qk_rope_head_dim)
+ q_pe, k_pe = self.rotary_emb(
+ position_ids[..., :attn_metadata.num_ctx_tokens], [q_pe, k_pe])
+ k_pe = k_pe.contiguous()
+
+ # build q for attention op
+ q_view = q.view(-1, self.num_heads,
+ self.qk_nope_head_dim + self.qk_rope_head_dim)
+ q_view[:, :,
+ self.qk_nope_head_dim:] = q_pe.view(-1, self.num_heads,
+ self.qk_rope_head_dim)
+ q = q_view.view(
+ -1,
+ self.num_heads * (self.qk_nope_head_dim + self.qk_rope_head_dim))
+ assert q.is_contiguous()
+
+ # append paged kv cache for mla
+ trtllm_attention.append_paged_kv_cache_for_mla(
+ compressed_kv,
+ k_pe,
+ attn_metadata,
+ )
+
+ # copy full_compressed_kv and full_k_pe from paged kv cache
+ full_compressed_kv, full_k_pe = trtllm_attention.load_paged_kv_cache_for_mla(
+ attn_metadata, q.dtype)
+ assert full_compressed_kv.shape[
+ 0] == attn_metadata.num_ctx_cached_tokens + attn_metadata.num_ctx_tokens
+ assert full_compressed_kv.shape[1] == self.kv_lora_rank
+ assert full_k_pe.shape[
+ 0] == attn_metadata.num_ctx_cached_tokens + attn_metadata.num_ctx_tokens
+ assert full_k_pe.shape[1] == self.qk_rope_head_dim
+ assert full_compressed_kv.is_contiguous()
+ assert full_k_pe.is_contiguous()
+
+ # compute full_k_nope and full_v from full_compressed_kv
+ full_kv = self.kv_b_proj(full_compressed_kv)
+ full_k_nope, full_v = full_kv.split(
+ [
+ self.num_heads * self.qk_nope_head_dim,
+ self.num_heads * self.v_head_dim
+ ],
+ -1,
+ )
+ full_k_nope = full_k_nope.view(-1, self.num_heads,
+ self.qk_nope_head_dim)
+ full_v = full_v.view(-1, self.num_heads, self.v_head_dim)
+
+ # build full_k and full_v
+ tokens_per_block = attn_metadata.kv_cache_manager.tokens_per_block
+ # paged kv cache should be initialized to 0 to avoid NaN
+ paged_full_kv = torch.zeros([
+ attn_metadata.num_contexts, 2,
+ (attn_metadata.max_ctx_kv_len + tokens_per_block - 1) //
+ tokens_per_block, self.num_heads, tokens_per_block,
+ max(self.qk_nope_head_dim + self.qk_rope_head_dim, self.v_head_dim)
+ ],
+ dtype=q.dtype,
+ device=q.device)
+ mla_context_kv_cache_block_offsets = trtllm_attention.set_paged_kv_cache_for_mla(
+ paged_full_kv,
+ full_k_nope,
+ full_v,
+ full_k_pe,
+ attn_metadata,
+ )
+
+ # out_scale = getattr(self.o_proj, "inv_input_scale", None)
+ out_scale = None # Currently we use BF16 MHA for context phase
+
+ attn_output = self.mha.forward(
+ q,
+ None,
+ None,
+ attn_metadata,
+ attention_input_type=AttentionInputType.context_only,
+ latent_cache=None,
+ out_scale=out_scale,
+ mla_context_paged_kv=paged_full_kv,
+ mla_context_kv_cache_block_offsets=
+ mla_context_kv_cache_block_offsets,
+ )
+
+ return attn_output
+
+ def forward_context(
+ self,
+ q: torch.Tensor,
+ compressed_kv: torch.Tensor,
+ k_pe: torch.Tensor,
+ attn_metadata: AttentionMetadata,
+ latent_cache: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ ) -> torch.Tensor:
+ if isinstance(self.mha, TrtllmAttention):
+ assert isinstance(attn_metadata, TrtllmAttentionMetadata)
+ trtllm_attention = cast(TrtllmAttention, self.mha)
+ if trtllm_attention.has_cached_kv_for_mla_context(attn_metadata):
+ return self.forward_context_with_cached_kv(
+ q, compressed_kv, k_pe, attn_metadata, position_ids)
+ return self.forward_context_default(q, compressed_kv, k_pe,
+ attn_metadata, latent_cache)
+
+ def forward_generation(
+ self,
+ q: torch.Tensor,
+ compressed_kv: torch.Tensor,
+ k_pe: torch.Tensor,
+ attn_metadata: AttentionMetadata,
+ latent_cache: Optional[torch.Tensor] = None,
+ ) -> torch.Tensor:
+ num_tokens = q.shape[0]
+ q_nope, q_pe = q.view([-1, self.num_heads, self.qk_head_dim]).split(
+ [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+
+ # fused_q contains 1) the result of the following bmm with shape [num_tokens, num_heads, kv_lora_rank]
+ # 2) rope(q_pe) with shape [num_tokens, num_heads, qk_rope_head_dim]. rope is applied inside AttentionOp
+ fused_q = torch.empty(
+ [
+ num_tokens, self.num_heads,
+ (self.kv_lora_rank + self.qk_rope_head_dim)
+ ],
+ dtype=q.dtype,
+ device=q.device,
+ )
+
+ if self.k_b_proj_trans.dtype == torch.bfloat16:
+ # [num_heads, num_tokens, self.qk_nope_head_dim]
+ q_nope_t = q_nope.transpose(0, 1)
+ # [num_heads, num_tokens, self.kv_lora_rank]
+ q_nope_out = fused_q[..., :self.kv_lora_rank].transpose(0, 1)
+
+ # [num_heads, num_tokens, self.qk_nope_head_dim] x [num_heads, kv_lora_rank, qk_nope_head_dim]
+ # -> [num_heads, num_tokens, kv_lora_rank] -> [num_tokens, num_heads, kv_lora_rank]
+ # The output of bmm is written directly into fused_q
+ torch.ops.trtllm.bmm_out(q_nope_t,
+ self.k_b_proj_trans.transpose(1, 2),
+ q_nope_out)
+ elif self.k_b_proj_trans.dtype == torch.float8_e4m3fn:
+ q_nope_fp8, q_nope_scales = torch.ops.trtllm.fp8_batched_quantize_1x128_permute102(
+ q_nope)
+ # [num_heads, num_tokens, self.kv_lora_rank]
+ q_nope_out = fused_q[..., :self.kv_lora_rank].transpose(0, 1)
+
+ torch.ops.trtllm.fp8_block_scaling_bmm_out(
+ q_nope_fp8, self.k_b_proj_trans, q_nope_scales,
+ self.k_b_proj_trans_scale, q_nope_out)
+ q_nope_scales = None
+ else:
+ raise NotImplementedError(
+ f"Missing bmm impl for dtype: {self.k_b_proj_trans.dtype}.")
+
+ if self.apply_rotary_emb:
+ fused_q[..., self.kv_lora_rank:] = q_pe
+ fused_q = fused_q.view([
+ num_tokens,
+ self.num_heads * (self.kv_lora_rank + self.qk_rope_head_dim)
+ ])
+
+ # out_scale = getattr(self.o_proj, "inv_input_scale", None)
+ out_scale = None # Although we use FP8 MLA for generation phase, the output is still in BF16
+
+ attn_out_latent = self.mqa.forward(
+ fused_q,
+ None,
+ None,
+ attn_metadata,
+ attention_input_type=AttentionInputType.generation_only,
+ out_scale=out_scale,
+ latent_cache=latent_cache, # kvcache and k_pe
+ q_pe=q_pe, # used by `invokeMLARopeGeneration`
+ )
+ fused_q = None
+
+ assert (attn_out_latent.shape[0] == q.shape[0] and
+ attn_out_latent.shape[1] == self.num_heads * self.kv_lora_rank)
+
+ # [seq, num_heads, kv_lora_rank]
+ attn_out_latent = attn_out_latent.view(
+ [-1, self.num_heads, self.kv_lora_rank])
+
+ attn_output = torch.empty([num_tokens, self.num_heads, self.v_head_dim],
+ dtype=attn_out_latent.dtype,
+ device=attn_out_latent.device)
+
+ if self.v_b_proj.dtype == torch.bfloat16:
+ # [num_heads, seq, kv_lora_rank] x [num_heads, kv_lora_rank, v_head_dim]
+ # -> [num_heads, seq, v_head_dim]
+ torch.ops.trtllm.bmm_out(attn_out_latent.transpose(0, 1),
+ self.v_b_proj.transpose(1, 2),
+ attn_output.transpose(0, 1))
+ elif self.v_b_proj.dtype == torch.float8_e4m3fn:
+ attn_out_latent, attn_out_latent_scales = torch.ops.trtllm.fp8_batched_quantize_1x128_permute102(
+ attn_out_latent)
+
+ torch.ops.trtllm.fp8_block_scaling_bmm_out(
+ attn_out_latent, self.v_b_proj, attn_out_latent_scales,
+ self.v_b_proj_scale, attn_output.transpose(0, 1))
+ attn_out_latent_scales = None
+ else:
+ raise NotImplementedError(
+ f"Missing bmm impl for dtype: {self.v_b_proj.dtype}.")
+
+ # [seq, num_heads * v_head_dim]
+ return attn_output.flatten(1, 2)
+
+ def forward(
+ self,
+ position_ids: Optional[torch.Tensor],
+ hidden_states: torch.Tensor,
+ attn_metadata: AttentionMetadata,
+ all_reduce_params: Optional[AllReduceParams] = None,
+ ) -> torch.Tensor:
+ if self.register_to_config:
+ attn_output = torch.ops.trtllm.mla_custom_op(
+ position_ids, hidden_states, self.layer_idx_str)
+ else:
+ attn_output = self.forward_impl(position_ids, hidden_states,
+ attn_metadata)
+ attn_output = self.o_proj(attn_output,
+ all_reduce_params=all_reduce_params)
+ return attn_output
diff --git a/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py b/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py
index a60766d789..0835058eda 100644
--- a/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py
+++ b/_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py
@@ -1,14 +1,17 @@
import json
import math
+import os
from abc import ABC, abstractmethod
-from dataclasses import dataclass, field, fields
+from dataclasses import dataclass, field
from enum import Enum, EnumMeta
from pathlib import Path
-from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
+from typing import (TYPE_CHECKING, Any, ClassVar, Dict, List, Literal, Optional,
+ Union)
import torch
import yaml
-from pydantic import BaseModel, Field, validator
+from pydantic import (BaseModel, Field, PrivateAttr, field_validator,
+ model_validator)
from strenum import StrEnum
from transformers import PreTrainedTokenizerBase
@@ -17,23 +20,30 @@ from tensorrt_llm.lora_manager import (LoraConfig,
from .._utils import mpi_rank
from ..auto_parallel import AutoParallelConfig, infer_cluster_config
+
+if TYPE_CHECKING:
+ from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
+
# yapf: disable
-from ..bindings.executor import BatchingType as _BatchingType
-from ..bindings.executor import \
- CacheTransceiverConfig as _CacheTransceiverConfig
-from ..bindings.executor import \
- CapacitySchedulerPolicy as _CapacitySchedulerPolicy
-from ..bindings.executor import ContextChunkingPolicy as _ContextChunkingPolicy
-from ..bindings.executor import DecodingConfig, DecodingMode
-from ..bindings.executor import DynamicBatchConfig as _DynamicBatchConfig
-from ..bindings.executor import EagleConfig, ExecutorConfig
-from ..bindings.executor import \
- ExtendedRuntimePerfKnobConfig as _ExtendedRuntimePerfKnobConfig
-from ..bindings.executor import KvCacheConfig as _KvCacheConfig
-from ..bindings.executor import \
- LookaheadDecodingConfig as _LookaheadDecodingConfig
-from ..bindings.executor import PeftCacheConfig as _PeftCacheConfig
-from ..bindings.executor import SchedulerConfig as _SchedulerConfig
+# isort: off
+from ..bindings.executor import (
+ BatchingType as _BatchingType,
+ CacheTransceiverConfig as _CacheTransceiverConfig,
+ CapacitySchedulerPolicy as _CapacitySchedulerPolicy,
+ ContextChunkingPolicy as _ContextChunkingPolicy,
+ DecodingConfig,
+ DecodingMode,
+ DynamicBatchConfig as _DynamicBatchConfig,
+ EagleConfig as _EagleConfig,
+ ExecutorConfig as _ExecutorConfig,
+ ExtendedRuntimePerfKnobConfig as _ExtendedRuntimePerfKnobConfig,
+ KvCacheConfig as _KvCacheConfig,
+ LookaheadDecodingConfig as _LookaheadDecodingConfig,
+ PeftCacheConfig as _PeftCacheConfig,
+ SchedulerConfig as _SchedulerConfig) # isort: skip
+# isort: on
+from transformers import PreTrainedTokenizerBase
+
# yapf: enable
from ..builder import BuildConfig, EngineConfig
from ..logger import logger
@@ -195,7 +205,8 @@ class DecodingBaseConfig(BaseModel):
"MTP": MTPDecodingConfig,
"Medusa": MedusaDecodingConfig,
"Eagle": EagleDecodingConfig,
- "Lookahead": LookaheadDecodingConfig
+ "Lookahead": LookaheadDecodingConfig,
+ "NGram": NGramDecodingConfig,
}
config_class = config_classes.get(decoding_type)
@@ -228,6 +239,7 @@ class EagleDecodingConfig(DecodingBaseConfig):
num_eagle_layers: Optional[int] = None
max_non_leaves_per_layer: Optional[int] = None
pytorch_eagle_weights_path: Optional[str] = None
+ eagle3_one_model: Optional[bool] = True
@classmethod
def from_dict(cls, data: dict):
@@ -236,6 +248,40 @@ class EagleDecodingConfig(DecodingBaseConfig):
decoding_type: ClassVar[str] = "Eagle"
+class NGramDecodingConfig(DecodingBaseConfig):
+ """
+ Configuration for NGram drafter speculative decoding.
+
+ Arguments:
+ prompt_lookup_num_tokens: int
+ The length maximum of draft tokens (can be understood as length maximum of output draft tokens).
+
+ max_matching_ngram_size: int
+ The length maximum of searching tokens (can be understood as length maximum of input tokens to search).
+
+ is_keep_all: bool = True
+ Whether to keep all candidate pattern-matches pairs, only one match is kept for each pattern if False.
+
+ is_use_oldest: bool = True
+ Whether to provide the oldest match when pattern is hit, the newest one is provided if False.
+
+ is_public_pool: bool = True
+ Whether to use a common pool for all requests, or the pool is private for each request if False.
+ """
+
+ prompt_lookup_num_tokens: int = 2
+ max_matching_ngram_size: int = 4
+ is_keep_all: bool = True
+ is_use_oldest: bool = True
+ is_public_pool: bool = True
+
+ @classmethod
+ def from_dict(cls, data: dict):
+ return cls(**data)
+
+ decoding_type: ClassVar[str] = "NGram"
+
+
class MTPDecodingConfig(DecodingBaseConfig):
num_nextn_predict_layers: Optional[int] = 1
use_relaxed_acceptance_for_thinking: Optional[bool] = False
@@ -512,7 +558,9 @@ class LookaheadDecodingConfig(DecodingBaseConfig, PybindMirror):
get_default_lookahead_decoding_verification_set(),
description="Number of NGrams in verification branch per step.")
- @validator('max_window_size', 'max_ngram_size', 'max_verification_set_size')
+ @field_validator('max_window_size', 'max_ngram_size',
+ 'max_verification_set_size')
+ @classmethod
def validate_positive_values(cls, v):
if v <= 0:
raise ValueError(f"Value must be positive, got {v}")
@@ -699,7 +747,10 @@ class _ModelWrapper:
return self.model if isinstance(self.model, str) else None
-class LlmArgs(BaseModel):
+class BaseLlmArgs(BaseModel):
+ """
+ Base class for both TorchLlmArgs and TrtLlmArgs. It contains all the arguments that are common to both.
+ """
model_config = {
"arbitrary_types_allowed": True,
"extra": "allow",
@@ -771,20 +822,11 @@ class LlmArgs(BaseModel):
cp_config: Optional[dict] = Field(default_factory=dict,
description="Context parallel config.")
- auto_parallel: bool = Field(default=False,
- description="Enable auto parallel mode.")
-
- auto_parallel_world_size: Optional[int] = Field(
- default=None, description="The world size for auto parallel mode.")
-
load_format: Literal['auto', 'dummy'] = Field(
default='auto',
description="The format to load the model.",
json_schema_extra={"type": "Literal['auto', 'dummy']"})
- enable_tqdm: bool = Field(default=False,
- description="Enable tqdm for progress bar.")
-
# LoRA arguments
enable_lora: bool = Field(default=False, description="Enable LoRA.")
@@ -816,18 +858,9 @@ class LlmArgs(BaseModel):
quant_config: Optional[QuantConfig] = Field(
default=None, description="Quantization config.")
- calib_config: Optional[CalibConfig] = Field(
- default=None, description="Calibration config.")
-
- # BuildConfig is introduced to give users a familiar interface to configure the model building.
- build_config: Optional[object] = Field(
- default=None,
- description="Build config.",
- json_schema_extra={"type": f"Optional[{get_type_repr(BuildConfig)}]"})
-
# Several options from ExecutorConfig, expanded here for less hierarchy
- kv_cache_config: Optional[KvCacheConfig] = Field(
- default=None, description="KV cache config.")
+ kv_cache_config: KvCacheConfig = Field(default_factory=KvCacheConfig,
+ description="KV cache config.")
enable_chunked_prefill: bool = Field(default=False,
description="Enable chunked prefill.")
@@ -850,29 +883,12 @@ class LlmArgs(BaseModel):
default=None,
description="The maximum number of iterations for request stats.")
- workspace: Optional[str] = Field(default=None,
- description="The workspace for the model.")
-
# A handful of options from PretrainedConfig
- embedding_parallel_mode: str = Field(
- default='SHARDING_ALONG_VOCAB',
- description="The embedding parallel mode.")
-
- fast_build: bool = Field(default=False, description="Enable fast build.")
-
- # Once set, the model will reuse the build_cache
- enable_build_cache: object = Field(
- default=False,
- description="Enable build cache.",
- json_schema_extra={
- "type": f"Union[{get_type_repr(BuildCacheConfig)}, bool]"
- })
-
peft_cache_config: Optional[PeftCacheConfig] = Field(
default=None, description="PEFT cache config.")
- scheduler_config: Optional[SchedulerConfig] = Field(
- default=None, description="Scheduler config.")
+ scheduler_config: SchedulerConfig = Field(default_factory=SchedulerConfig,
+ description="Scheduler config.")
cache_transceiver_config: Optional[CacheTransceiverConfig] = Field(
default=None, description="Cache transceiver config.")
@@ -880,8 +896,8 @@ class LlmArgs(BaseModel):
# Speculative decoding parameters
speculative_config: Optional[Union[
LookaheadDecodingConfig, MedusaDecodingConfig, EagleDecodingConfig,
- MTPDecodingConfig]] = Field(default=None,
- description="Speculative decoding config.")
+ MTPDecodingConfig, NGramDecodingConfig]] = Field(
+ default=None, description="Speculative decoding config.")
batching_type: Optional[BatchingType] = Field(default=None,
description="Batching type.")
@@ -889,13 +905,6 @@ class LlmArgs(BaseModel):
normalize_log_probs: bool = Field(
default=False, description="Normalize log probabilities.")
- gather_generation_logits: bool = Field(
- default=False, description="Gather generation logits.")
-
- extended_runtime_perf_knob_config: Optional[
- ExtendedRuntimePerfKnobConfig] = Field(
- default=None, description="Extended runtime perf knob config.")
-
max_batch_size: Optional[int] = Field(default=None,
description="The maximum batch size.")
@@ -916,6 +925,9 @@ class LlmArgs(BaseModel):
description="The backend to use.",
exclude=True)
+ gather_generation_logits: bool = Field(
+ default=False, description="Gather generation logits.")
+
# private fields those are unstable and just for internal use
num_postprocess_workers: int = Field(
default=0,
@@ -988,40 +1000,19 @@ class LlmArgs(BaseModel):
moe_tp_size=self.moe_tensor_parallel_size,
moe_ep_size=self.moe_expert_parallel_size,
enable_attention_dp=self.enable_attention_dp,
- cp_config=self.cp_config,
- auto_parallel=self.auto_parallel)
- if self.parallel_config.auto_parallel:
- self.parallel_config.world_size = self.auto_parallel_world_size
-
- self.auto_parallel_config = AutoParallelConfig(
- sharded_io_allowlist=[
- "past_key_value_\\d+",
- "present_key_value_\\d*",
- ],
- same_buffer_io={
- "past_key_value_(\\d+)": "present_key_value_\\1",
- },
- **infer_cluster_config(),
- )
-
- self.kv_cache_config = self.kv_cache_config or KvCacheConfig()
-
- self.scheduler_config = self.scheduler_config or SchedulerConfig()
-
- # This is used to hold th options for convert_checkpoint
- self._convert_checkpoint_options = {}
+ cp_config=self.cp_config)
@classmethod
- def from_kwargs(cls, **kwargs: Any) -> "LlmArgs":
+ def from_kwargs(cls, **kwargs: Any) -> "BaseLlmArgs":
"""Create `LlmArgs` instance from kwargs.
Args:
kwargs (Any): Arguments passed to `LlmArgs` constructor.
Returns:
- tensorrt_llm.llmapi.llm_utils.LlmArgs: The `LlmArgs` instance.
+ tensorrt_llm.llmapi.llm_utils.BaseLlmArgs: The `BaseLlmArgs` instance.
"""
- kwargs = LlmArgs._maybe_update_config_for_consistency(dict(kwargs))
+ kwargs = BaseLlmArgs._maybe_update_config_for_consistency(dict(kwargs))
ret = cls(**kwargs)
ret._setup()
return ret
@@ -1032,8 +1023,7 @@ class LlmArgs(BaseModel):
Returns:
dict: The dict that contains all fields of the `LlmArgs` instance.
"""
- return dict(
- (field.name, getattr(self, field.name)) for field in fields(self))
+ return self.model_dump()
@staticmethod
def _maybe_update_config_for_consistency(
@@ -1041,18 +1031,18 @@ class LlmArgs(BaseModel):
# max_beam_width is not included since vague behavior due to lacking the support for dynamic beam width during
# generation
black_list = set(["max_beam_width"])
- executor_config_attrs = set(attr for attr in dir(ExecutorConfig)
- if not attr.startswith('_')
- and callable(getattr(ExecutorConfig, attr)))
+ executor_config_attrs = set(
+ attr for attr in dir(_ExecutorConfig) if not attr.startswith('_')
+ and callable(getattr(_ExecutorConfig, attr)))
executor_config_attrs -= black_list
- llm_args_attr = set(LlmArgs.model_fields.keys())
- # NOTE: When cpp ExecutorConfig add new options, please add the new options into `_LlmArgs` with docs as well
+ llm_args_attr = set(BaseLlmArgs.model_fields.keys())
+ # NOTE: When cpp ExecutorConfig add new options, please add the new options into `LlmArgs` with docs as well
# ASK chunweiy for help if you are not sure about the new options.
assert executor_config_attrs.issubset(
llm_args_attr
), f"New options found in underlying ExecutorConfig: {llm_args_attr - executor_config_attrs}"
- # ensure build_config and LlmArgs consistency
+ # ensure build_config and LlmArgsBase consistency
if kwargs_dict.get("backend") != "pytorch" and kwargs_dict.get(
"build_config"):
# TODO: move this to _perform_config_arbitration() once it's default-on.
@@ -1062,11 +1052,11 @@ class LlmArgs(BaseModel):
build_val = getattr(kwargs_dict["build_config"], field_name,
None)
llmargs_val = kwargs_dict.get(
- field_name) or LlmArgs.model_fields[field_name]
+ field_name) or BaseLlmArgs.model_fields[field_name]
if build_val != llmargs_val:
logger.warning(
- f"Overriding LlmArgs.{field_name} ({llmargs_val}) with build_config.{field_name} ({build_val})."
+ f"Overriding LlmArgsBase.{field_name} ({llmargs_val}) with build_config.{field_name} ({build_val})."
)
kwargs_dict[field_name] = build_val
@@ -1075,12 +1065,15 @@ class LlmArgs(BaseModel):
def _setup(self):
''' This method will setup the configs right before building the model. '''
+ is_trt_llm_args = isinstance(self, TrtLlmArgs)
+
assert isinstance(self.model,
(str, Path)), f"Invalid model: {self.model}"
- self._setup_embedding_parallel_mode()
+ if is_trt_llm_args:
+ self._setup_embedding_parallel_mode()
- if self.enable_build_cache:
+ if is_trt_llm_args and self.enable_build_cache:
self.enable_build_cache = BuildCacheConfig() if isinstance(
self.enable_build_cache, bool) else self.enable_build_cache
if not isinstance(self.enable_build_cache, BuildCacheConfig):
@@ -1121,7 +1114,8 @@ class LlmArgs(BaseModel):
self.quant_config = self.quant_config or QuantConfig()
- self.calib_config = self.calib_config or CalibConfig()
+ if is_trt_llm_args:
+ self.calib_config = self.calib_config or CalibConfig()
# Note: max_batch_size and max_num_tokens in LlmArgs are for runtime,
# which will be passed to the C++ Executor API, overwriting the values
@@ -1148,8 +1142,9 @@ class LlmArgs(BaseModel):
self.build_config.max_num_tokens = self.max_num_tokens
# TODO: remove the checker when manage weights support all data types
- if self.fast_build and (self.quant_config.quant_algo is QuantAlgo.FP8
- or self.quant_config.quant_algo is None):
+ if is_trt_llm_args and self.fast_build and (
+ self.quant_config.quant_algo is QuantAlgo.FP8
+ or self.quant_config.quant_algo is None):
self._update_plugin_config("manage_weights", True)
if self.parallel_config._world_size == 1:
@@ -1162,9 +1157,12 @@ class LlmArgs(BaseModel):
if self.max_lora_rank is not None:
self.build_config.lora_config.max_lora_rank = self.max_lora_rank
+ self._setup_speculative_config()
+
if self.enable_prompt_adapter:
self.build_config.max_prompt_embedding_table_size = self.max_prompt_adapter_token * self.build_config.max_batch_size
+ def _setup_speculative_config(self):
if self.speculative_config:
if isinstance(self.speculative_config, LookaheadDecodingConfig):
lookahead_config = self.speculative_config
@@ -1194,7 +1192,7 @@ class LlmArgs(BaseModel):
self.build_config.max_draft_len = self.speculative_config.max_draft_len
if self.backend != 'pytorch':
- eagle_config = EagleConfig(
+ eagle_config = _EagleConfig(
self.speculative_config.eagle_choices,
self.speculative_config.greedy_sampling,
self.speculative_config.posterior_threshold,
@@ -1207,9 +1205,25 @@ class LlmArgs(BaseModel):
from tensorrt_llm._torch.speculative import Eagle3Config
self.speculative_config = Eagle3Config(
max_draft_tokens=self.speculative_config.max_draft_len,
- eagle_weights_path=self.speculative_config.
- pytorch_eagle_weights_path)
-
+ draft_model_path=self.speculative_config.
+ pytorch_eagle_weights_path,
+ eagle3_one_model=self.speculative_config.
+ eagle3_one_model)
+ elif isinstance(self.speculative_config, NGramDecodingConfig):
+ self.build_config.speculative_decoding_mode = SpeculativeDecodingMode.NGRAM
+ assert self.backend == 'pytorch'
+ assert self.speculative_config.prompt_lookup_num_tokens > 0 and self.speculative_config.max_matching_ngram_size > 0
+ self.build_config.max_draft_len = self.speculative_config.max_draft_len
+ from tensorrt_llm._torch.speculative import NGramConfig
+ self.speculative_config = NGramConfig(
+ prompt_lookup_num_tokens=self.speculative_config.
+ prompt_lookup_num_tokens,
+ max_matching_ngram_size=self.speculative_config.
+ max_matching_ngram_size,
+ is_keep_all=self.speculative_config.is_keep_all,
+ is_use_oldest=self.speculative_config.is_use_oldest,
+ is_public_pool=self.speculative_config.is_public_pool,
+ )
elif isinstance(self.speculative_config, MTPDecodingConfig):
from tensorrt_llm._torch.speculative import MTPConfig
self.speculative_config = MTPConfig(
@@ -1350,32 +1364,385 @@ class LlmArgs(BaseModel):
f"Invalid embedding_parallel_mode: {self.llm_args.embedding_parallel_mode}"
)
- def _validate_kv_cache_config(self):
- if self.kv_cache_config is None:
- raise ValueError("KvCacheConfig is required for streaming LLM.")
- if self.kv_cache_config.max_attention_window is None:
- raise ValueError(
- "KvCacheConfig.max_attention_window should be set for streaming LLM."
- )
- if any(i <= 0 for i in self.kv_cache_config.max_attention_window):
- raise ValueError(
- "Elements in KvCacheConfig.max_attention_window should be greater than 0."
- )
+class TrtLlmArgs(BaseLlmArgs):
- if self.kv_cache_config.sink_token_length is None:
- raise ValueError(
- "KvCacheConfig.sink_token_length should be set for streaming LLM."
- )
- if self.kv_cache_config.sink_token_length <= 0:
- raise ValueError(
- "KvCacheConfig.sink_token_length should be greater than 0.")
+ auto_parallel: bool = Field(
+ default=False,
+ description="Enable auto parallel mode.",
+ deprecated=
+ "Use tensor_parallel_size/pipeline_parallel_size/xxx_parallel_size instead.",
+ )
+ auto_parallel_world_size: Optional[int] = Field(
+ default=None,
+ description="The world size for auto parallel mode.",
+ deprecated=
+ "Use tensor_parallel_size/pipeline_parallel_size/xxx_parallel_size instead.",
+ )
+
+ enable_tqdm: bool = Field(default=False,
+ description="Enable tqdm for progress bar.")
+
+ # BuildConfig is introduced to give users a familiar interface to configure the model building.
+ build_config: Optional[object] = Field(
+ default=None,
+ description="Build config.",
+ json_schema_extra={"type": f"Optional[{get_type_repr(BuildConfig)}]"})
+
+ workspace: Optional[str] = Field(default=None,
+ description="The workspace for the model.")
+
+ # Once set, the model will reuse the build_cache
+ enable_build_cache: object = Field(
+ default=False,
+ description="Enable build cache.",
+ json_schema_extra={
+ "type": f"Union[{get_type_repr(BuildCacheConfig)}, bool]"
+ })
+
+ extended_runtime_perf_knob_config: Optional[
+ ExtendedRuntimePerfKnobConfig] = Field(
+ default=None, description="Extended runtime perf knob config.")
+
+ calib_config: Optional[CalibConfig] = Field(
+ default=None, description="Calibration config.")
+
+ embedding_parallel_mode: str = Field(
+ default='SHARDING_ALONG_VOCAB',
+ description="The embedding parallel mode.")
+
+ fast_build: bool = Field(default=False, description="Enable fast build.")
+
+ # Private attributes
+ _auto_parallel_config: Optional[AutoParallelConfig] = PrivateAttr(
+ default=None)
+ # This is used to hold the options for convert_checkpoint
+ _convert_checkpoint_options: Dict[str,
+ Any] = PrivateAttr(default_factory=dict)
+
+ @property
+ def auto_parallel_config(self) -> AutoParallelConfig:
+ return self._auto_parallel_config
+
+ @print_traceback_on_error
+ def model_post_init(self, __context):
+ super().model_post_init(__context)
+
+ self._auto_parallel_config = AutoParallelConfig(
+ sharded_io_allowlist=[
+ "past_key_value_\\d+",
+ "present_key_value_\\d*",
+ ],
+ same_buffer_io={
+ "past_key_value_(\\d+)": "present_key_value_\\1",
+ },
+ **infer_cluster_config(),
+ )
+
+ self.parallel_config.auto_parallel = self.auto_parallel
+
+ if self.parallel_config.auto_parallel:
+ self.parallel_config.world_size = self.auto_parallel_world_size
+
+
+LlmArgs = TrtLlmArgs
LLMARGS_EXPLICIT_DOCSTRING = generate_api_docs_as_docstring(LlmArgs,
indent=' ' * 4)
+class LoadFormat(Enum):
+ AUTO = 0
+ # Initialize all weights randomly.
+ DUMMY = 1
+
+
+class TorchLlmArgs(BaseLlmArgs):
+
+ # Just a dummy BuildConfig to allow code reuse with the TrtLlmArgs
+ build_config: Optional[object] = Field(
+ default=None,
+ description="Build config.",
+ exclude_from_json=True,
+ json_schema_extra={"type": f"Optional[{get_type_repr(BuildConfig)}]"})
+
+ # PyTorch backend specific configurations
+
+ use_cuda_graph: bool = Field(
+ default=False,
+ description=
+ "If true, use CUDA graphs for decoding. CUDA graphs are only created for the batch sizes in cuda_graph_batch_sizes, and are enabled for batches that consist of decoding requests *only* (the reason is that it's hard to capture a single graph with prefill requests since the input shapes are a function of the sequence lengths). Note that each CUDA graph can use up to 200 MB of extra memory."
+ )
+
+ cuda_graph_batch_sizes: Optional[List[int]] = Field(
+ default=None,
+ description="List of batch sizes to create CUDA graphs for.")
+
+ cuda_graph_max_batch_size: int = Field(
+ default=0, description="Maximum batch size for CUDA graphs.")
+
+ cuda_graph_padding_enabled: bool = Field(
+ default=False,
+ description=
+ "If true, batches are rounded up to the nearest cuda_graph_batch_size. This is usually a net win for performance."
+ )
+
+ disable_overlap_scheduler: bool = Field(
+ default=False, description="Disable the overlap scheduler.")
+
+ moe_max_num_tokens: Optional[int] = Field(
+ default=None,
+ description=
+ "If set, at most moe_max_num_tokens tokens will be sent to torch.ops.trtllm.fused_moe at the same time. If the number of tokens exceeds moe_max_num_tokens, the input tensors will be split into chunks and a for loop will be used."
+ )
+
+ moe_load_balancer: Optional[Union[object, str]] = Field(
+ default=None,
+ description="Configuration for MoE load balancing.",
+ json_schema_extra={"type": "Union[MoeLoadBalancerConfig, str]"})
+
+ attn_backend: str = Field(default='TRTLLM',
+ description="Attention backend to use.")
+
+ moe_backend: str = Field(default='CUTLASS',
+ description="MoE backend to use.")
+
+ mixed_sampler: bool = Field(
+ default=False,
+ description=
+ "If true, will iterate over sampling_params of each request and use the corresponding sampling strategy, e.g. top-k, top-p, etc."
+ )
+
+ enable_trtllm_sampler: bool = Field(
+ default=False,
+ description=
+ "If true, will use the TRTLLM sampler instead of the PyTorch sampler. The TRTLLM sampler has a wide coverage of sampling strategies."
+ )
+
+ kv_cache_dtype: str = Field(default="auto",
+ description="Data type for KV cache.")
+
+ use_kv_cache: bool = Field(default=True,
+ description="Whether to use KV cache.")
+
+ enable_iter_perf_stats: bool = Field(
+ default=False, description="Enable iteration performance statistics.")
+
+ enable_iter_req_stats: bool = Field(
+ default=False,
+ description=
+ "If true, enables per request stats per iteration. Must also set enable_iter_perf_stats to true to get request stats."
+ )
+
+ print_iter_log: bool = Field(default=False,
+ description="Print iteration logs.")
+
+ torch_compile_enabled: bool = Field(
+ default=False, description="Enable torch.compile optimization.")
+
+ torch_compile_fullgraph: bool = Field(
+ default=True,
+ description="Enable full graph compilation in torch.compile.")
+
+ torch_compile_inductor_enabled: bool = Field(
+ default=False, description="Enable inductor backend in torch.compile.")
+
+ torch_compile_piecewise_cuda_graph: bool = Field(
+ default=False,
+ description="Enable piecewise CUDA graph in torch.compile.")
+
+ torch_compile_enable_userbuffers: bool = Field(
+ default=True,
+ description=
+ "When torch compile is enabled, userbuffers is enabled by default.")
+
+ autotuner_enabled: bool = Field(
+ default=True,
+ description="Enable autotuner only when torch compile is enabled.")
+
+ enable_layerwise_nvtx_marker: bool = Field(
+ default=False, description="If true, enable layerwise nvtx marker.")
+
+ auto_deploy_config: Optional[object] = Field(
+ default=None,
+ description="Auto deploy config.",
+ exclude_from_json=True,
+ json_schema_extra={"type": f"Optional[AutoDeployConfig]"})
+
+ load_format: Union[str, LoadFormat] = Field(
+ default=LoadFormat.AUTO,
+ description=
+ "How to load the model weights. By default, detect the weight type from the model checkpoint."
+ )
+
+ enable_min_latency: bool = Field(
+ default=False,
+ description=
+ "If true, enable min-latency mode. Currently only used for Llama4.",
+ )
+
+ @field_validator('load_format', mode='before')
+ @classmethod
+ def convert_load_format(cls, v):
+ if isinstance(v, LoadFormat):
+ return v
+ load_format = v.upper()
+ if load_format not in LoadFormat.__members__:
+ raise ValueError(f"Invalid LoadFormat: {v}")
+ return LoadFormat[load_format]
+
+ # Extra resource managers to use in addition to the KV cache manager.
+ # Each manager's prepare_resources method is called before the forward pass,
+ # and update_resources() is called after the pass finishes. free_resources()
+ # is called when a request finishes. The KV cache manager is guaranteed to
+ # be invoked after all of these extra managers in all stages.
+ _extra_resource_managers: Dict[str,
+ object] = PrivateAttr(default_factory=dict, )
+
+ @property
+ def extra_resource_managers(self) -> Dict[str, object]:
+ return self._extra_resource_managers
+
+ @extra_resource_managers.setter
+ def extra_resource_managers(self, value: Dict[str, object]) -> None:
+ self._extra_resource_managers = value
+
+ @print_traceback_on_error
+ def model_post_init(self, __context):
+ from .._torch.model_config import MoeLoadBalancerConfig
+
+ super().model_post_init(__context)
+ self.model_format = _ModelFormatKind.HF
+
+ if isinstance(self.moe_load_balancer, str):
+ if not os.path.exists(self.moe_load_balancer):
+ raise FileNotFoundError(
+ f"MoE load balancer config file not found: {self.moe_load_balancer}"
+ )
+ try:
+ with open(self.moe_load_balancer) as f:
+ moe_load_balancer_config = yaml.safe_load(f)
+ self.moe_load_balancer = MoeLoadBalancerConfig(
+ **moe_load_balancer_config)
+ except Exception as e:
+ raise ValueError(
+ f"Failed to load MoE load balancer config file: {self.moe_load_balancer}"
+ ) from e
+
+ # TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig
+ def get_pytorch_backend_config(self) -> "PyTorchConfig":
+ from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
+
+ # TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig
+ # Just a WAR to support the auto_deploy
+ if self.auto_deploy_config is not None:
+ return self.auto_deploy_config
+
+ return PyTorchConfig(
+ extra_resource_managers=self.extra_resource_managers,
+ use_cuda_graph=self.use_cuda_graph,
+ cuda_graph_batch_sizes=self.cuda_graph_batch_sizes,
+ cuda_graph_max_batch_size=self.cuda_graph_max_batch_size,
+ cuda_graph_padding_enabled=self.cuda_graph_padding_enabled,
+ disable_overlap_scheduler=self.disable_overlap_scheduler,
+ moe_max_num_tokens=self.moe_max_num_tokens,
+ moe_load_balancer=self.moe_load_balancer,
+ attn_backend=self.attn_backend,
+ moe_backend=self.moe_backend,
+ mixed_sampler=self.mixed_sampler,
+ enable_trtllm_sampler=self.enable_trtllm_sampler,
+ kv_cache_dtype=self.kv_cache_dtype,
+ use_kv_cache=self.use_kv_cache,
+ enable_iter_perf_stats=self.enable_iter_perf_stats,
+ enable_iter_req_stats=self.enable_iter_req_stats,
+ print_iter_log=self.print_iter_log,
+ torch_compile_enabled=self.torch_compile_enabled,
+ torch_compile_fullgraph=self.torch_compile_fullgraph,
+ torch_compile_inductor_enabled=self.torch_compile_inductor_enabled,
+ torch_compile_piecewise_cuda_graph=self.
+ torch_compile_piecewise_cuda_graph,
+ torch_compile_enable_userbuffers=self.
+ torch_compile_enable_userbuffers,
+ autotuner_enabled=self.autotuner_enabled,
+ enable_layerwise_nvtx_marker=self.enable_layerwise_nvtx_marker,
+ load_format=self.load_format,
+ enable_min_latency=self.enable_min_latency)
+
+ @field_validator('cuda_graph_max_batch_size')
+ @classmethod
+ def validate_cuda_graph_max_batch_size(cls, v):
+ """Validate cuda_graph_max_batch_size is non-negative."""
+ if v < 0:
+ raise ValueError("cuda_graph_max_batch_size must be non-negative")
+ return v
+
+ @staticmethod
+ def _generate_cuda_graph_batch_sizes(max_batch_size: int,
+ padding_enabled: bool) -> List[int]:
+ """Generate a list of batch sizes for CUDA graphs.
+
+ Args:
+ max_batch_size: Maximum batch size to generate up to
+ padding_enabled: Whether padding is enabled, which affects the batch size distribution
+
+ Returns:
+ List of batch sizes to create CUDA graphs for
+ """
+ if padding_enabled:
+ batch_sizes = [1, 2, 4] + [i * 8 for i in range(1, 17)]
+ else:
+ batch_sizes = list(range(1, 32)) + [32, 64, 128]
+
+ # Add powers of 2 up to max_batch_size
+ batch_sizes += [
+ 2**i for i in range(8, math.floor(math.log(max_batch_size, 2)))
+ ]
+
+ # Filter and sort batch sizes
+ batch_sizes = sorted(
+ [size for size in batch_sizes if size <= max_batch_size])
+
+ # Add max_batch_size if not already included
+ if max_batch_size != batch_sizes[-1]:
+ batch_sizes.append(max_batch_size)
+
+ return batch_sizes
+
+ @model_validator(mode='after')
+ def validate_cuda_graph_config(self) -> 'TorchLlmArgs':
+ """Validate CUDA graph configuration.
+
+ Ensures that:
+ 1. If cuda_graph_batch_sizes is provided, cuda_graph_max_batch_size must be 0
+ 2. If cuda_graph_batch_sizes is not provided, it is generated based on cuda_graph_max_batch_size
+ 3. If both are provided, cuda_graph_batch_sizes must match the generated values
+ """
+ if self.cuda_graph_batch_sizes is not None:
+ self.cuda_graph_batch_sizes = sorted(self.cuda_graph_batch_sizes)
+ if self.cuda_graph_max_batch_size != 0:
+ if self.cuda_graph_batch_sizes != self._generate_cuda_graph_batch_sizes(
+ self.cuda_graph_max_batch_size,
+ self.cuda_graph_padding_enabled):
+ raise ValueError(
+ "Please don't set both cuda_graph_batch_sizes "
+ "and cuda_graph_max_batch_size.\n"
+ f"cuda_graph_batch_sizes: {self.cuda_graph_batch_sizes}, "
+ f"cuda_graph_max_batch_size: {self.cuda_graph_max_batch_size}"
+ )
+ else:
+ self.cuda_graph_max_batch_size = max(
+ self.cuda_graph_batch_sizes)
+ else:
+ max_batch_size = self.cuda_graph_max_batch_size or 128
+ generated_sizes = self._generate_cuda_graph_batch_sizes(
+ max_batch_size, self.cuda_graph_padding_enabled)
+ self.cuda_graph_batch_sizes = generated_sizes
+ self.cuda_graph_max_batch_size = max_batch_size
+
+ return self
+
+
def update_llm_args_with_extra_dict(
llm_args: Dict,
llm_args_dict: Dict,
diff --git a/_images/8x_l20_L40S_node_architecture.png b/_images/8x_l20_L40S_node_architecture.png
new file mode 100644
index 0000000000000000000000000000000000000000..725427f163c4ff957eebbf9c19d771b472f68eb9
GIT binary patch
literal 267638
zcmZsDdpOhm|9?kyM@a{Vh@t~IZy|(~p@>lAd{~B@&72QOIUiDFbIe&epL4F{5VOTB
zIae|^Mh+V`+kWqLci;Ety1u_ZaLskuHLt_-`8Yk_v3Cq~4j(#oXxFY?hi~d?8SmP4
zpk&vs{mutDfZwc?jlTwd?1mfbT;ElR5h4OF_BmHyN++?v3WPXZTUuBZ@%j%pe`=RbP)j;6r1XJ__2
z*L=S3ydLKBPY>f8$1`|aZr*-&K~Xu{!{J4Wl+pg4Cv_~W6U$mH6IZfa@x~J2d2*Ak
zj@@iVlwro4RvJbv=;f5k8Ba!uL{X~ot>52V_%^~>E39mF);88~B@i22Jla8Ra1ESV
zv{7pRe%KQ^O`qO0u-*E=N7yX<^G7t=NVo+9^7k76?=qYh^L__k6^5XVhBHQqks>?a
ze8*i%Zug3W?~Glg)+A@IuFs0FwurHYAwM>T(>r`=3pyGqol21^e?CWtdYW?fpHC!F
zbowNeCc;`3DKQME;c)Rb8~Ma#|4zDZTA9_HTV=?cB-V=Z#w@!sj8bMe+VZ8LIQr_(
zvpCBc+85#si!lQ(G5PzW5f|^GaR2_R<3_Pan0_aHY9&W_yHCd;{MUhSIR=)st$}^-
zAHL~tsPSvBh8*K-GUKan_M(+j(05MTD}obTq<@ymHoQN};LqFLjxq1KOfLzil@LXk
z-$jbCVbnTYi8|#q5h_Cex{;C`@__d4fVy1!O6W@FG>f^3)u2!HYA{#FUePD#)s;Hu
zSv$EbFk_;VF+s{pzQh>l+!+uuqNA`OFoW;qXpG8YIs8{1cc?S%=S2SGn}=Fn_*1N49@>-)7bgW?HPb=$4q~+Wv-={gfr*Ea~1~;1_CAY9A_s}
z=P#$%^oEDDFez0U?OWZl%nZ0mRR2iWNTiWY4{K`g=vI+zs0&udT2*Oo#j0U1
z8-C;8
zP4Yb|4@35I3Bx2z=FwnmmJN$GYFLsRM%z#dUsu9`bGOk%7sKH3jg1nG(4a4MISMSy
z%=E7)CH<99vowT;x0t8+qEH=D89HVl#(Hl>Lx
z?84Z0-U3{x(}H~P%-YK)^!&$bq0{R*N@}wL*sY9U=3~A#yP$4K8~OI_LC>5)lPHNB
zNTb#lrzj~A&Jp{LZ@GI0Pi(v~YkxRBF&6lJ4A^}uZrkmCGt3+rVY4E9rDW=|kU(bt
z_m9U7vpjrWWa`hW&TM1s^=B%`z(>*X7$(*BLY7X~VsMmjq?
z>?^dc=9S2MSUg><=9zt+!wF0MZ!PtEyP6KCI~FAod=TTa}@
zhBK`^8#ECR8fCPNpn#j0VN$o-kYBmTqGNw9gX(KE@y|)ND6rpk1R8DBbwy=10gO?+
zXL#lN6XJL7@GajN`m}$`diKiIDQaU=_Oe&BDq4e{-KqBdFGFOa>x#VxD3pIP;UdR(
z8zr2dg}#zD5uUT2MSMG(l<5&ppD=7{!B=43vrm2VQfg+O!RB`_=Rj<>v)$G|Ek>I5o+>6|upJP@|yq&YBSC9pZ0?)Z7ygQp5xRP5+
zJLFzM8bJ=yK>=h}Th+`T`r|rkBqJPSC7iY0<~7){2$di5u*SgS%H1geM*{
zd;7r`HDCp!O#y{~*c480D$%o9d%ljPex)>NFlRM#DnrSxhT&AFaK)+BB>ybowP(?2
z?L1w>p|=S=U5;(b`{f^^e7JCylRtQIz)dC+fADo~Dc@<-ubWNQ2ur%d_df8-gYUa;
zj3~FvbE$6b^eA<#Zd?psyNFAx^erO#q_<9P&^uXl+{zASC1g;|jJq%I!&om
znp%P>WiE$pwqm1lbzO%BxxU*Qdz#vm$$$j#nHSZFH|vPy>WuAN*0yh?;G|tkk3yw<
z?gWcD!wOcH-I-otOj~iY7Lu9H%+}GV9fDGBm5z1T&a8+=W&t{UZs6dQgo)Y5x6YH6
z(`zqr=ViQOi8^6Fb=Y@|T~kYJSrpscfl-HjWOoTz%Q|ClW@oTY0PztET)<9+nY|nh
zuJ+7_hQaRd4?ssBKL*zJ_Bw(&XUm+Myg0d_t(4&r&?T6YsM6)-A1;mdQp-$peQEzl
zLYij&;4*GH|F7JV8Dm+zVdx8$^63^0QokcmMv47q9czLxTjqsghmiN2hWyJF4X6Vf3?RgE>wo{kIlPi(QJm_6B{g3wzy-9FMsp1a)
z+C|vUt-)H?&|v&DWd5`YQ+EyYm<{=ojoS=c*)-6g6=RTjF?Wq+9(u%-;uiGC7d5*J#W0M*5#W@(Y`NtX0VCtfNIK-;F7sEeq8SF*)=A<>|3
z{PAUT`#9b>8}Q^BKZ^f2C2byKXX|Rzs;@SL(ZAPoIdYr5TPmUJKtEH)tlEWiDTQ>o
zaon<$er)brRcg+WtKwJP9#Yi9>+YItN71@manKo7(6Zq^Lv_a~h0QAE%)+^&Xk{|s
zw~%`?v%;H56Rh4WI$)#O^7ciw%e?+qlIp3VnPiFfp-@;<+;hHe>FYM9NJac#^53V;
zk%H4>PM9?=7p%{4zAj07+EQd4{{Z$S{v02r-6(K}4N07E_p@uRIW$}~odZuwMzuXX
zNlp0D!H*+l1*6?;7+HhAZqIDoUeEb99X=Fq6Tk8mgIr`nP(f{kSycf{<}Z=RwHtVuQ814m>0Ix0{|X
z2WmW&&A^F9&2053!KTN7{XX0u?2eVFTC%}5cz
z8Q&|5AGQGp0c`+|HL4&TTmNNOOjfaqhwx$sRwoY_{ZQ9AN
z-wn}GHotpRbZX0`B)d^D{#}@2iS*THAFNv&XPVS%rG09yIToz_qE{esl_sb$zH#*-
zY6^_X>hav^5i(y9w)|L4Q_j>xl|iRv<7jR3YAbF5dgo=|OiX|64=SK#{((1(ssBP-
zPLsq>>mM(wdndX5E;Y(Ne)xLjyHUihnA(|2-Kq5UB}aI_>-;ZDdHVwcFgL2p_@d<*
zyG~nBjQ{`>{j@&^PBV7NG
zFroLf|72Fjjvdd6r9uh*MNN7`_x|Bb@bl25HET6yp2Pd7ggvC{Fk_Bgn{KkH{(tYX
zOSm}v9~{pskqX%hRDb{Mf((H|K~zbZ9QFw8m9)Kqe6rTUxaF8roUnDG5F-d@11ce7
zU90B6m@7~NGpSqCVV%@zYqi`P-3;ERgReS%o|jcF@eTi^X9G&_9e!r5{oz(aC@yo_
zSFt$!r%u>|F6W6Gnx|85S?sIpTNa#J(8d9`;Vtp>NVmthQxm6tb@k)%`O<4c
z(PD_TH=1?*ax+LxL6pJbukjjg3cFJ&w+sim;g3!lj_VQXp>d|^KIVt~b3c_%q19%N
zkPbkNZ*6L9ztBM8f;)HSWLX&}#zpM~aH6pyzZ-aCj0?hB65(2x(6ZP9Nnj1jGKSk^
zYu8o2=6qfJDD<*+lG$&J^sJw$aUJeKX0aPWiDB5<3)jJ43}=H@j?pzX$yhej=H)Qz
zJ+LU&_|PC;F`Q(Qt75&`GMa4?m8?sdk}H-6sqehw-gy@i4iK7N3}?_}=O6euo&frX
zVYy@!1E{eJx1O`vrksz@>Eg=22Zgy{?GkS;(O`F5;ldm7A2
zS-g~J4w6a;Efshvs!MK_%=cG#ys-!g{*={t^crai0P7Msg-Y*4<)DDGJr2uvazm|D|f1c+AVg
zQa*@mHn`LYKv-340w{f~`m}@|-N<$|&MeCSz{!EO1jKkUq28I0^7OlmfAfHq(&V})
zi|VP-8(39q4j&ig80vS*Mts=-_@7tD3~%#-o@w%5o1^n(drAEZO(Q&%ubm`}Ec1tK
zPPY7qX1BZ14C}&nMv1gmz-H@=TIO0=Jf~t=3dZJ=Z|KZ@{+g-TKO~u2`M>M#4=g8c
zwO2&L8X!rzVRL3-j$MqFhw$L%D8>*^|M@ReaWD9qVtDf0CM9JJws;4
z?!Rc+x>o!Os*VREiy
zz6u-0B$hE5Vl!PZeRDM-T5ZX`?xvK&)|_nk_SoIcv9jJ%y(Zk&Wpsy>sM?zQ4wCd&
zoj@f;RmHk|YMWeO#tl=IF@wwrwgO?wt>CJWfuq_tMz=9OK={Ck0$t0aq~FV)AupXx
zW#jT@ZrWHbW31VBT%6K61MVw|>HKiAQm!-lHCYG`MRfl_X3p#o;O*<<_IIj;IYr4u
z@0{WQa^|0767P9&I4tI`K`+)?dgoQy&v)jLBNJ*if6Cm-rGLv@u}hNa>7kr_q!Q=_
zN~f7vL{mJ7R4lA|Q6-
zU1lU34uZBE6{*sN9upt`yn<54y!eY0TGsm*yY62#K@?vGngI9l6lDO$+XgG{E6U)-
zFSd>Bj>->x0ZFx4_DRp&+5Z)7TP=Z@lHCLsA^<&cvoarR817
ziBIJ=S4l?687q_+AxCXmws=VIS7(+hg4o)z*h=tOvzT{Gen?5m;~FZj3TG_Ni#vxq
zy-R+2tkSihrSC~ocun`I=_(I*Q-JkO6d%yodJo@d073n#ghdht*$6LVIdml$8|>*>
z@3a~wU((BfR$tmn*SB2>8dd#vJssipCCe|{69CkN()HiWgr1IvzQ4}7X`xmtqEMIh
z7lTdcE4z4)m-dqS%WNobn%zN_kb46(uUEQm^Ad?~>*tnV1pT0y(Q|o>Dny2I-)hi=
zDbt9z+MEz18KWbT$9*DH#`^83G`$eL_;;P6HvP|afgMkZ*klufI`0c0FZ{FJU~R3^
zgRU1up+SWjFr-|nfNTP9IMil_4gC{)_CgfH*x=gj8HgXHWxS_zEv+icBoUmTRZ#iC
zcVEFLC;4044=$)`Xp@K~x$1%)Y@tCpDHb5{E=<)~D!UCuNu)Owu-iB@EqcvwN?(<8
zUtxw{vOqKSOM`8-S}2)E`tT;3sE>UWj^C528i`ed-T_`V<(i!Le6S=R4Utoo5lh#c
zpS~i(vxB*jTDPB5Q2M!DK-^PdGA$%Jly&UARG`>+~gD2O+C2m
z5fF}pJX>bX{r3nlONiIY49pMC`G=ax3$F8p%hL2*#}ugr2ENWZd2Cu1VDc?)l
zv1Qb|n=_6YevGnN;>w@yO4s|B5nsLy|0n^~Dx8hrIxmsR
z+Aw7K25ohTsAo#)UmJSN#jl;9tF8eS6z~weU-YPPv#NA5F<%^_4mtuzkG+P5p(K;}
zn7(NBjl8;{PxUYN1c~v#$*-T;?AXk9e_nMQm(<)<_~1xIdC7Tcx%i&^(pX$dY
z$miiflNXIzbGJTTBrt*n{Ebh8`Pix%=0aW!)TI`Z0ScDB#=kkqfc0!^L?&yZ1K?Px
zEBD)F64c5C!V?@h3ZWoQY29CAyqe{D@*R~OrLCBFBPG#W?~nT0h~Bm?fX3hD-HmxQ
z-M7W7j6tSfnp_n~%1qmTSgGy%;rTwV-ZAlpX7Op&DWbt|HMDtB=@zup7VR>JMmFkx
z6y=yN@Bq(q`z`A!x*C9;Lz5QP_d-g`E-n`ZSK2I!Nh#jBn6>_O#@t%K{NzZHcZj=h
zLC}J~Xeq@#;ip;KkCSlQnx*iK*rW=NhxFx=*<15rL_OP}mOh9KC^z>h7s&WNSN^gq
zOJg0Z0p9Fo>!x0QxrHmk;rH)=Xsv%}6w~-$*e@-ruA?%M2z>8{2J4GR&B;Tf%wZa%
zGh2x>$lH7_@6D2x_0Ofm8B4^+Jb0&jIHWFf%58e|UTV3sMes@P9TBEXf11Q4Q6Iei
z173M=ZmZZGKW&6>Cbf%A|mf1>F$$`gZEy?Tj$pDNXVYv(C%q;cH$HYBb9_bH0X4T
zcv9*K`Usl*)0&7&`
zloLp3lY11=Y!Ez^B^-ZTYY43GXmZpM>xIC~w&?`9Gvb0h?H}dhpU$z1;FnE6X7xu+
zFe5-}QWGh@8yL6(nQC0Aqk1$=s_t>>cV2u9+D2_OLcL3UDM@xMmq$&s82=Y#Jbypu
z>5UrWcvm{wNn*Ws`U`YouVeR7Gwu8i$Lwd0?+{l+;+5(J88Q3Ccn@x88HfB$ZtFGv
z;~u!nD>Fd=zz9`rK2mQhhl>pqKTifkxR%3%L>|bX)$_~I6{$M28qnZqp(1-g$p#a@{f$90h$PW7xu)F=3|Prb?%z=?7+ZUj#*fHw?9^W5cM4T>kH
zq`ljoc=N7#JbLnn2qT_}R{zQ?cV!ub*cyHXXpSa17|l#&z(COuna47humqKJmJuyY
zLwlnC+>Pn~?#6ALP{c*!`n!Bo@!k{w{
zbrEN7`>2y9&%GsY*B#W7cwD3olap^SO)TFn_6sl(Hr)uIukGQ5OODU<_@sS|F0Dyg
zeLu8bW11Xcp8;xxxPj|X6_#W0ExYk}!bVQQ1
z#JZ(xhltI7ZHHG0H0Cdv75wBbbR>Dfe!PiLxnF$R%paoY`JnFwZTZxQ`RfIURh95@
zXs(0N+k{)2h!I?35_{3(O(s=2?-hRYH$=TIb4O?uwEBxi6{(pq%ZP84Tn*xp{@kar
z`Og!n+)Xjx*~<4$koj)KTicxmG%d?xR)BAzVRve4J!}3d?r$lqU8OwdCQc^b{K5ev
z4DY;|+13ygH*Nb)|TAhsid9f`9Z(m*~qA
z4%R(0rvOe;Ff()GbAll31QdqXI%V`Y;SHNW!f!bnwTdE&Vt5ljEz~QOjh5$d^DA&&
z(oSnng3QGI^p(*UrS}R0TJ^d&@~5D+;Ej=-@%(C?e#c(B`jymw)!u%CIzc|siVbVKk7O%i0RWTnLyl)GdzGGM<
znI2_f?caU?e0;M-S-Pl2QS!qi6U%k~!ZOKbqJNF;6wRm3Fd0R*v?ApnfelR0FIlF3
zN!`Cp%@l7nONX|Ek*oon7E)fJ#kY7|`sK49tZSk9&ajj%-le{+S38u5##^7mi7)uq
z05rI;SRqG4i_mLCGFRwquGO52_XK@Z8t=UN_iofH{cpw5IjF%P8s@SwgnoM4&+|%`
zd37utUptZ|rK~GAn$RLV#relk;l`>HRin&OE>VtQ5Q04HY%e9>Q26hjdj3rZ+4Ayj@D$M&N!v8h_MShC0f
zQXLui_Fkx31`gWk$n$CQZLb<^J>^rU(Zc{ly!q>#SjCgx47R_f=+Hi+X7{tA!*jAg
z&zfYsuC_+2&}vBDm`m`=?K74EZTVa5LMF>bT#Gz
zZ<=QnoUETp*5<+80J=cquSoe&z&i4#b=&TA2k!2FsJdAPIlC-S=eO|}^{{O_HdCR*
zQQ_Hp&CU;8w$^rX)lF?9qa>udqolieZa}|82^#SY*ny`?ymw*tj
z=!mdt`rDIMP2ETOt;f9>)qX$M|Cl?$X8((l3UfoP$mbbsr5>&&I_sez>b-P&Kp(ab
zT)F~?_WEqq7B(9@CQeJs6KlzM4S%Ipj+)ZH=Hgit0GkG-XkAH)xl5?^odP5ig!(gq
z^A*)Gw^D|Or$DchRhQq6eS`koYMTp2LAc~Q`98TOFS22;0de5jq)TW!61U8KJT*|f
zs5Y+K@vJ}iZYGt|03fTnf?UW90GB_(wbmo@D+Mx)v1Q!qa8>OrI4tBA
zehID^2Wxot{_$TN{UZj^Oeb8f4kGA
zE`$nkuawWUCv$2}v;pmR&&*^Wz^cDKSaO%m2vE4rv?E-GkBRPfe|)x5_Wsm_pjgY-
z>wi)D_dMQ9PVX=fCFE|l%@4XXpLW9Qij=#SQFFfJHv5lvX-mA|+74x-y+Ye{ej?&`(l>5>Dm?E>w?l|HiE$P#TUv?qor$o6#
zU`I`!x_tNsy&U)Cffl$NawsnKgqkE`C1Q9v>GGiQOisX$*t(mTtq0eoZ_B-IX@@Pz
zx5|v4%x@8x%D)6T4jL?KcchksYqR(whv`c?gIVbq|1yb3N@Z1i@QmG;_G}*ph?bj5
zTvXZ->(D!%{568oXYQXuiw)ns<&e@#<>^>}$hLf+ZbPeRm$W}3i!EPmyu={`zn^{O
zv-$!FwVsk=lMxty;mfD?nxh}o=SZD?H9=Bui+^Lc#R6xW(GWQ@1fFqIHe<5=c*JsD
z+AQe<_uM7Rh@i7??$7)P1)VLO0MtXB)DDPP?bR!lVRqJPf^?8AwKhFFhq2^@bvH-3
zb!VA&7CkgGW9ui`vsMT614F0T(!_S#hB+@S=0y4rYs+cp-TR;xp-pFW+dp6MDp8$s
zX_=GuR4(CUv5n+rA+t?;GlTKxD)W^5Pia+HLy6xNi2Ogwez?{L&`e=%gO237?!E=>
zD0Ci3R1m@ivODrX5tNF21#Gz5{JO&(aK{$`KYgPpC1{qpOpxU%X8`Khau)M|R{pHo#>0EL(%I&w_uCn3OruoJ1O;r$EmM{$L
zALAhb3{aKuba(!=wBJ|faNEu4X|}=I<%A0m>)N{ZTydH%(4EkonBWyvhTf%rpPpN#
zcec{4FkIvNljDS#g_=?*<-Xko(A|~b89K&gg&ZFy$0j1pc3P_TXC6`hbNG?O@DI9T+=YzlisFr@Jnj*s}j3U=Oc%5>yl=Igi-@Iu;tm
zr{yv*UQIJUYL9*L@~?M+v9)IEq@>iDe*1#%%cS++^4sF0P1@B`c!GxnuHhs_JGa;k
zIYKJl1XW6p+i%6+8V#>1`zJ@zUhuD{MN7Wh&f3AUu4Zat0f!!TBR1lJ=eK(|=I#i*
zt`|Uw9)aAu6_0*9C5`~W@9g`@Zav#=QQzxMDG=Xs*+!*^TMpIeXCZo?uOC$yEIe^`
z!0v=XJC3d|+aNONQhd2<@prz0Hrg2$1gPKgI)(B2e0eg)*`F3I6yuJb<{^nfAbA$M
zRspZlJ1NdTgr2JRm#2J0aAS(H^yAbB#%}`}pu(-v@C?mj)yj{Mrg%2#@
z0u(_J=E6{+Qv_-%uWH${du?r>m8DJwPHsA2Ri5MIsg|S7DI;O
zFNBTbauiGC%$B1c8dwUgHe*9QFtv6XM7+WD;_*@ygOaGR&pLB54g7${&=kTQ}{0thii<9H6Q2d}gx@UmCFlYqX7a
zMXYi&zi|`LyThpJ#r4c5c?`h8wzCm`|L*^H1NhzTm~zkgZy77Z$5<;skDQDGK{Fy8
zlWmOBxGwV43!*-tjR^5EA2KI@UZ@Kkcx2&3OE_qKLEr^^YI;n}$8ukOUpG&;llVs`Zqc)S3{!=+qVpH{Sha8YqulicFgF)+s
z-XU#)oup2SBm9i`{Sv%;b^Y4`-8TMU$je!`2L`QGw422>zGNDRu0ii~9b%QsrrM~A5`?UcW_AW{Jj@9MhXq^$lZ}}s;y7Uu*
z@C9Zs3eqF{hXMw=W0J+y&mA9Bix>_D;AW&vf|(<9cf`V=;N&-s^}sT|Oo`x4)J^er
zd9cwTuar{s!kqd$rYyeEd6I2w3TEK+^*CfBla!mR+2`3#*9I#snovw!FHP`i`wRUUXF7THgxAJWaNVIk?br9d|Qg|
zT!Qa(f(uVat?ZTXIRgLkdR_*ua!{e=OC$=%+gWPJTkoc$@5MM~vUQD6?_+0RJ9o(p
zmFR}}0g(+F$_lf&+t4j2D4n#b?$U~P;u!|%>YA}yM%z7@OHJ)$cX)rW#72W=6bUMdNSe==?Vz!=4eA|?fdl>8;#3?T_`i0NX$|{
z0Uyu7kt7PHXGY1&6=>~|2Jx`?h&G*>U%wsr)Oz1lUU=yWCQ_3DxXQ5Qs*&D7DKFhW
zjZFGKi~90lN*31#w!)m0#E1plzXpl8XdVUhT#Ab2iOU7CH9zN53?^KUq%NC3I164&
zW^E;x^r|g;!^iDj=O}@e`K{5nU3xtO2P`qc)51&OmkA37!F=HEqSKAGlDI`L=c-Ix
zzqLSo|AQ7#AJW74N)^~9J;C`7B1*YbU6&Ecj+q$jb+3BJc}5C!U%{~H`4>yZU9%$!
z6-{f7!opI(58L!Fncfe%J$lYUy~Wmzf0nB?lWz__atiV=)fvz76C(CnOF~B&HHJ-)
z0|U->8j?DkeD#h9s|$OGTj}#Y;)MEcuFTtugXjF+=4-9t7UplD{djN1mQF6hdM<0n
z_Qe-EkT9eOo%b|!aSv3WY};*=Fqg0NYlS5FK8k_%0T_3Ij;?>fGf50v#uiAS&4=g019
z`*>bPo@MZHDW=w%Px)X^_UH~kyhuQ1Y04!jNBwhHn?8SWdT0$B6BFYAAuY>b%&9zQ
z@zOxdt@&V}ZwDuN)nAn9+W5V`=o^I2gY)bv08cINGl)MJmC_hgxwp6;@|S3g0D3x@
zE!x}cvYUZ%)YDr$Y^_}41bWz6%n~4zVw;m(7Zg(JX)`+=e`d={
zd2fQ3%QBuvd(k-G400eo3f3sx_p@=73aa(1QyC^)o(1AuA@f!T8VntKkT9kUE}hN-
z?2e9RwYbWcQ5J!*OV-niHxx>;lOcPkF{A3H1G~GH@LzssZ<3f!lOFiChG&lf3&DnX
z%PgZRm{Rwu1Dsd1>SrUG(QuYBtu`8_XyGQ0F`Frpy=%4+&3n6aQQy;&`)ekx7BIAh
z-b*WVpm|8uuF$)xqf;2#v%i(yae@Cm-#o9VHq43{TE*C92gM4on7}jr)Zb~2#N$Ju
zK*EbcxZEL1U-oNi9Sk0fyW)5)cF%)Gxp>p${T%6?H|wPLudoBA%Avo8Q!V1&Z^*LN
zWZ@r|;%#?+)D@Hnj0i$59^wC);PU$2@2m@ll-SL}UvJD?EXNlK^^imP!&5h0s>lO_
z_a~ga-ycg^8-8b*dgg=a8$SYS1cS+~xk5$Sq>aWzy%e6fz8~R6**f~3{5AU6Dkkft
z%-DQsub{wppp$)L`%lXG;gq^U8tmW9?k~I(t9={$CK0{sy<2MDZ#RtA4&9=3FUh+<^uP36{@)e0}AJL
za~Iq3LUlByRTgwiuC$@*A7RSuAJLXr%8QGN{rCv_Q9c3bQwIAfXUH!my3J188Y*>e
z56&+@VaFr2TFiNB^#wX#9LqS{JK~@ixT39n=0==70;qrRB<+)FL1*MMZ0O^onBptY
zJEztn$q`E#RugQSFwoPC(CFNi$c3BwY;(B;0sM367!Sb{Y+IiW)PvgE{7k$5&I}I9@@CsV({NS6Zwi+=sDkr_1^*mu8en#Gl
zT;T5gwEK{tbwSKmb`&X>PcI-D!lf5G?Jv|*M5~Z8O_s-)?~|^(SIB(xDfG-~3An|D
z-3Schc%G!F#e@33X*@QHIv~{!WP{b!wm%u>OnhO-%HXeZhhV5?efqr{;ZkAtk0t4R
z?F$g6&P^aRiB$g;!Hpgb7`}ONkQ1tSi>N`9l$gW2j27g07G+yLJ9GXmogiF(inur4
zQ%iA?8;GttVdfN#JM`8oY)`7G@m>cih3PLFvVm=h2(AhAj?&!U>0p(!~C`n5KeAEy~8z^5N&M6;eg0z<+E;$&Fyt+K-
z8HCG(b@c5P;nJspZ}tmv-A67Ie`nW(F?1@&rX8VvJw1o^TTgri
zQW`BnPTyt|JYv7I8-VOp&WnD<+>S%{luuEP+ms4RgnUd9pVerD&9r{sozCj^~tU_#AUrY7#5+6gyam*682^f%nd*x|{t0evs>0|}SzwDiR$
z?se@Hy1q3f(S=R`4w=w}%?RH@vrR(eRS=ytz?}>M3+}%|m
z_IMK36*l!@7b9UtBA8I;c4W4ldajAPr9(5wmvW6hv^<)YJo9>Be4^#V>;W9xt_-Y6
zbUT_3(uV}m52gF(h0XUw!DoUqiKDbGUA@y>nCwP+n${!*0t
z51#(75d#}hA`p$1B0395>G$GO+Y?Uo2#Oh=6FsiqZ^`lHy}n%U%Twu^gFC$BriYD>
z&dNLoa_a4*EFbgKpDM~uLm1<)Dl+hA#CCiianY7Vt2^6y8b`Xrw%#3yEk$Rk0txpo
z2o)eGA=Ore@A!{zKlF6rty{2zmCp+G^)<0nUZe%~{kk`!SSIFZ
zklbhMLT$J?+C)R?@%N*{hH#>vl&)*37fguqub4~M@uYZqf4~BCE_`s6$@be6OWCn3
z%zXFH#;;H0JA$|mkmB|qr@kKj(qt@?>0XxKMs}Bu{}@9M3AUo^53QjQWG(BXDJ!%F
zpj=de7-`Y8($K^a;(^wcXC79HyMv(+>SKcoU7pkB9Y7000&-WMz;
zl);!Iup>vT1jXVt2hTzIW5d#}hk%FLRr3Z<*+h3`!A4r5D_KBV|EExF5D;W`x*=w)
z=GB2#FnWdl9hiU@wm#iMwPP+hX|(!PeUFR{+yb%zJ4+{q1<5Om(>GJ3ZZ|HBv14at
z(c^b2@nN`ph9{!Ta*(jzlKNRJ$n0{*WxVJ+ohNF@0WMwgwV>d+h79*zT|fPD$(H4q
zH?5>!Fhn7mIJn$&^*-$UvmYU1g>fQuWH46SQ8B-yJ*>?vYG3GHoE3p1I61!1X~ScL
zDs5HsO*1?{*D^6o^yVcY59fr>vMFI+dr`DQchD-w?Qtu)*m!VSNQ$E1t?f#mi6|3Y$=
zhu!CnJNgq)P6hzAiFAsD;^i_K!QCIj=Q29y_!wEslATO5y?r7?BWi{%_F#5!d
zy3WCPag93}uANR<@?4MzP_)2s0-7hH10Nb1jeNtU)qBFeGey(GJ3=iCf3GKR)1A65
z_3_y;i$EJdO~_1KMj*7Ug;H7*ZWB?xPNka(E}c-0R-MJ#!*+$XybMxd#UilvW#^qrcmJEu!yOOw!(>%N23^-ez_LUFBgIC%9ID)M6|~lcY1o
zN-?0chOzXb0`24$g#$
zS`<-EPPGJ;55D_$qCq?^#av3|rFWmz)NMe=$jEd1{)oQq1f;{^g(U6byczkZDUF_b
zaU;U%0>6vH`FhgKO8<7(-$NFe2&$5e+5x$PkTlRf0TP=>5S+<-QAuLmwZ7Ap_dd3X
zd-s~Fsq=62e-Q3BM#nb_pW;-}!mBT6a*tOJitF?ac_7KldGHNpWCN-wsZg0*M?O7h
zrGZp#&wT4Am)9UjN9mninP-ps9MC6vzJI!OXtR^Wm`|PH6&EUFW9{+&Zx
zQl6O_0fLT2NZzL^)RVBnO0Z_Ma-ZDGFV;a09GQ0~C&p}pvL`JGHKid#+vs=r^+&o7
zv(z|z%Kzk}1@_p>8jhZ_0i>M>2R&-H1hTq|%t1Pv*
zqwDmtoQM;JaX;PNu8?CU4phRLv6)}qW)t0J?#~)ml;|(?AhI6dFSo{&m{JS@AS^o=Y=bBxU&|EWDu{3Yy)#_6CG4!}lne8b_H_;AjPu@e7Q2yz)q1d?Hz)Ntn8NT{CY5$`i?&Jc`I-cT
zH3s&)3oz@P6dY}{AY^I^TyOD<7hPrxz7iz@umv+YOR!R`rO4T0ori^wP
z239$JBnr_Rn0gUXmbQ#!;*=Afat=U*p?Xj3KbAEu_PaF)
z#KaEn{Fz($A6-4kBJo6H!l?^c?T&!>mtOQ2D1kq`uD48FOCk{>`a2b{+9R~Xjz9XH
z`^K(&W9Mp`F2R@C9JwubLz|~2L6UI1YZV9|j4D%apS(Bl($b7u&kMe6F5f2ROz?=jfjfv{S4(*xu~66Yk$V`
zu6jYoeW~|yPIqseu9|)we^g@d=!07m1LOGzh8B_}Px&G2yS_Pv5u6B%^{=jFDp$X`PL(
z_Mh_d+mNhrVhdGu{JcrV7>NQT9RueR4)NZJfx)OrC&*OyFWQq>gyZGmcdAMBxsKPI
zmFu?wFh6;rJ`mCUm>)5BmOHSoP5h%UhL((nUJoyAUvQm-h8}=g*!JIr`GB{ii-(%t
z;7B51YwGTlFPJVSPCXnsH(tba+8Cs9R;8>>e8|SHztcUN`zMt?%)Ij-2Rym=rGRLf
zbU{jB*InqTic@M`uTs@bRoso*^&b}hRkt=`Vwj!C-!sde#2eE(hQBl@wQjZ{3U(@R
zzLy&u!D(e@An2h1g$)_!9}_SFfYMxv?bcIUq-|$VlHDs>>xsGF0o)Snm-&GOjHf6w
zeK(_6b_5J|++UTLu2eRdfkR(#!4zq-*uzJN`Wgzw)f%>)3eIJTr(*_mWSN6b4Mo7S6onL*!^6}oJS_j
zC{Ysrz;TCG`VV6wn=jqMJH0U@MFw5TCywGp|
z6&)h|w@#mUO%5vbnr2^F@T33*qi4-SX|-5B&?Oqfo}63{TjsJrbTzkP$FoPiN5`qi
z9&YO96LeJnrHU95-R&rj*!(>)#UA0Zik{lb(>s35#99Pe)4x#gvR9DPwf-VU!$Fd(
z&3Xph+Bos8w8uZ`d3948p_Z5$^A)cuIR}kW8;%0I6rOt7lP+;SvDVpva0KqW&E)a^w9BCu9FEfdJ$;tx$F+bU
zq`@7$;+~z@?UHO(%=aQyvmOUr=fTT-cs1&G-tJF@6EgitJU#U%@1eaM+d!U-#6Z6@
z@KeibID=FQX|-uj9b!b#sHi`v@RU>E%|N%ak1zkCy4kclMS5!pd`)mG{!to#^d|c4
z2CDzF+H`4KI;`p76f{}Q53}pnC&MrwMXpcV%4QPncz2%m@eib$Z&xaD2suvagTD*0
zDz`)rUgAQv>wW3X!84K(t|nm@0VlRC;E|^L3^Zyv@59EXfFP<9YUn+L&z~#`NSD?u
z=9u&F?$+%k+q6+CJhS=w-gb&cn}N?sB`@aO&pLDXZ3`$NB=jX@Uw#I}ei6E%v)H(o
zKcR0+5||XNOOXXl@8Nv?K0Dn9z5=;vdE$NQmjk3GsibP;HI~cJzS*(-LvwB^$xp8{
z@!W5sE3xwn6Q#j}l6^v!iT1;Z?GFUXxTs{&
zAjSn?)MuklN8QK37Oe#}26t<{6LFrI-hJGSKcuX8<`A!9sS=lWae;52n1%JoYjNo7
zBFOzRlLORs
zHeF80EO~!VnYby%aHR9@2)o-8Yn(sY%uQ5C&UT3Fq$~h)!Z^alSiV0QkLFju%TGan
zx7*N!Xm0j#sD&V3n)LOFA8|$UWaN0`qNx*JiDw98
zbS3BMZo6+2FwIsx$Hu1v1I$XzqS3M_OgF2pK(U<*cJ~;e9fSLtmfj1c1on`
z{;sIzz7xlY=+pKoC@MmSM*lqrjZYfkXv-1qCWv_u~vyi0q*d_^qEQ
z;}h-vtejs@E~0t9kn&Y(J-pxN)#{5$TelxX@N|>~=cvlYpU*0T2%)bVwUh~PWaUCZ
z*C2#`-ES7Dn|}4?@Sqa=RExsz84VHHMg{1@g(iF!j&OjQc{
z5Zb=D+{XtUS> LOZ;>Z`2HMXQn5>87>~HkNq57_wX#)sPYoZg1It@|6gO|!m}7vmkLM{h@=JfJOKL;-LOt2#u`LIlaW^qZ9D>1_`buV~Q6uo#{)>JjETARQ`S_g+$N3*w4F
zK8U-yP5%|G5u&(sTIceLfdrh!s=;LSu$|oI$)by}eU|sn?4jAce%TaBtH??A{iVKQ
z4mjQ$Z0pZ;ITLQ2uEmP`XqK#fZfPw*tZvu3@gY~9PSTp+;vK?9#GuO%u2$%ZSqe2t
z(vf=KsxHWS!m)YbTDT*(8=(J7W>_++A8EEa+$p<(cI6pcY&?6v_r7?9y}rmo2fcO8
z$ADH2O;U#0Ox8`ESo?FRy8d(FfkX9#22Ar_5v--&N`t+8-*ePT0pfSe#hpw+$2eqw
z)(W#=Qs8XlQ}mghRc-pGxFEn>J^YI;#Sq!jpxX&~ycT#o{pFa{VC}|qzJOWsZjNJ-
zFH>B(EeH#FTJ4o4gbfxnTlu!wV~d}y4FBx97{4l~9ni9Aa)b4Vr$maZUrqcVp;*@`
zS5nkp!o91#?(=(xSoYYBwW|<_^R!87XDU_%@Z-59XzFm7i=cHRk4so=7=98`2
zJ|vn-mP>%f`z;+01s>Yq>7i3Axd7EB`BT2IW&m$>P-0QK#r}@x$b9(^b#7BuW8Av8
zl|(-|n*mJW`c!(-u>XgQNHC($_AS^S0)OL20NJQ;&`Mzcv+X<)THcuM$)YxE_~&4h
zQPLc;b4{`nv@*bcfP!)7y($(J(ojv9*)G(N8ed}^LdJp@V8g0ny
z^*~Gf9yY#i-fGl1G36R0{(A-2eUbp{k99@%ddeXt+XIAP
z%?2rsNy-Ow?BCD0NV;P~HCrsiuAU0b++0{h_JJU7b5^F_Hq)}8NyUBCxKWvnP~Lmu
z2M7WGr{AP9yMEdqx%EP_NdCp>^dRtnjwz?eE%pexTZ?=WQn9S~(|fy*a{2tm`FBs_
zYKq8zw;E-AP3^DdPT30KjSG$!M8sg(yFV4$Q)Bc~F%#bE>=Ah}pF04dT4mGSzb9F~
zz*H*Y%QpmCo1>g<0}L5zB3RL3<9UdIKX
z$pFE28Wy7&6q{^ci+9Ef>PbM8L#r}Fy7J9GO{_TD`w)m
zzm=s%J*EWLyWNKP6tU?em{Qt@N5usLYyiahg=t?H3H}h)jZuIiK~nr}XSK
zISJbMlh8q*7+U3om8dJJZMgPdbt48-4l(N&s)3i``)e8qD;El^88fF*Q@v8FZ95(k
z*;#kSY3`s8i5wW1#Yj$jAk!vs4!f`FnjGAD*YE%S3@-n!qZ5r}c78Zqh=4fsDuZU%
z(_a8!P747%L={lwMJk$GKpeJN;cRO5iDGO85W*YK%q04XCxo+!(o5|WAh||7-$sV(
zhrHst*R+&Bvk}0g{4(%=Sy4FC<7`D_*ORxAeW7tVw%IC~N3>5vQE@#N(71LV$SPi$
zH&L`?ix_$w_3xBugnM9@(u|arCp6TeolM$bfWaBK?aUgY!W|Y{0gX026P!R%HSST!
zd*190ZgEajhv}Rr*=&ROC5H=_m8cJfZi(JD`(iRqRdyqz!?s4K4yLd_?S28e4+VrQj@KOs#i8mVWC7FkkOy
zIw@9bAg9@i0+Vl807E1%F?<+h%8J@6iI0_BeVuyVCkb5TAn_ITZWPL?#+8^%(1X;0!O$|LoXZ
zQb8`Gtpt%@g$Fq}X5gXN9qpFj(f5h){evn)gf9cTXT;n%1=IWiP=A|bob`ikq^&XLerQX7%4EXLxIwud218Iros&1aNqc6#|5>O9UX$E|GDpfC3ha
zy2gzYBZrw&0%-9Wx%q2f8Q!Y?Z)1t(KVs48qkoGF36xC(2km;`@SqUP`I6>F>sly}
zK-tItgRp>9Gk8@2mvCIEz@^`RQ|kZgr(G(sqLa6Q;=@KZ4P-;X3NySXwt-HD!k6yV
zD@p)kmdhyMAm47GHhX|YcD;Ym&mClV3vTpVO9N{1f%AKsA;2UKeVPo~HV6R#VQRU|
zBWOhdOivb64=B3IR;q2G@wkBdKl56^U-Q3c5u*7RTm7il`^h1@$$qRZj+9F{=a8*b
z0Q3`*HkVeAV^e+oC%fIqj<&dXmrdx$ORQG%giFt~^A8b%&{uSO@Rt?fNZ9GK2~>&n)=4PxsE
zT|m*iEG)R&y$zzQ+<<&5t1j4A9YP-nAi1YO3}mk%XyCtdcv{XYb$TxZ#V&7QEb#91u;2yoho3481o^INnN
z4D4kJK?m+nEY_i+w5J;duRr(F5AL=BB%J{#Jy4vEi%7#ghWr{K?_Stl
zKKI3WGi%p%Xm(*c{-vw*r_vUXy@CdlmdilV`-Km)141AC!9JJwSit@DaY;l{*lE5(X6g>r0_^6Z*Gz{R}P%!3~!{9rB+4zVZze*{81kbqNo
z^N7pU{MUL@2${=3ui``^&1H|qe1TqTMg~#>b5C4b$&AVLqsdZ;KFmRr-X9*>%e>jk
z9M7DlvEseha5RqiJ{p>E2cQQ6EE$=Q^`X}@qNdT`=1!{lV#)U_6zL$`XZx-ibLy|o?(jF#
z+1+QZHaL4x!OMkjxl`UBY;W(>rFiG+foz5XaPYfzp(eog>}1|I$r=`W6V?QRN)YJCIJ`DJ)il_lBCw-^k%{>t
zl90E^U=W#@rq{cqt<&&5kdn;y6`Yh3K(xs~=esqJkiF$%pm0_`-+WF6m&?}k#j*iU
zz6dqiyITwN_DrNrjLG|rAN{VOjC0Mxx4a9PBd0$)KW$GmV#Tz97?_i8PviWDa{(h8
zrXpx9aN5qZ!!cJZ?7AGe#QW{Uy6lt6H&fgTG&NLMW!tJ1wFK@7#pz6
z_rprf|E)IRm=3o*t0qfz|Lz+oP0pdTG%j!XGY@(Y1SJ+8_MO21Ff$kncod;jWWae_
zPk6Y3Aa$f!@m?srXf>!)lfLclV$p0&9llQ0ILKZX=-rL6Yu(P5$e4_1xi!62YF3EC
zcLI6bhOMoE>YIDsg0h!UTKt=t-wM0tDU!Z)=4-qDdTlX~j`yp%H`Ry-~?qXzYOLF8o|c
z(8o)hYa&q6dO*a+DZA^D`|4?ev+T87(=c;=0nYW^nL6~oS9$_57WNO!b=IOrl$56u
z-J!BF(6yoYp_Uqh{iH~t!@RzrL=35#&0DLL`faVTK}DU1RHq^0)8BVcdEfC;MqBnU
z$a!5!RS*Ly0CEi>@jX)@HxPNLmlv>vZM8zob=OtrjSA*u;R=v%m?@<2Zusdf9V+EL!ixG%LcDuG9CH?Jc9TWBj
zhz0l8SPBWa=?I4@qZ#z7%J3Az+PT{xuQ
zfYQ&lo!X8a0jxm#4blLn3OmWUwE1s#3XXr!5y$P=b5oG^aU5n)uucSEGg-PIsljR3
zxOGde;+xsVH_NVvVqH_NJP7>^%-rY1yL#xA2-zhX%P_;wu9>MO#vCB@5CGYzVmFoh
z?`Ye#RcqTJNyIQCMX=WDx`gDBks$=M~7J4uG5?(fFA^SatgRC^1wdMs}4pTl8P`;$5+^3xOgRVc|
zp?plaP_ua_u<~EWj8PR{fE{5+M+eMVx@polC|wGY8Y+T7BM>k^dcF3>iN;Fr21oJL
zdwje#vcSp~8_+3*qKHrKx)(@nT0}I209y-jm!7}X
z0Foy|d<+W6@A-k$@vg#`#;6n8G!credQ3%azyUQwJ!2bo7ZOXN4I4ADBoec=&52i3
zwCwL0gwjydcb}i#G%Y&h(r!~;|LSyVR(O^#=ypm6z=qMv&0`%`WHSI6DXkiBU8
zfIPPx&`J#`JfHw*zaJZ5IBP(279_Izv1+gf%(%XPnAbVwDm~~tL^$5uW0cSeDYlpe
zsfzsxod@#(X>qyrfO?=BL;@Pe@$ap(Aw&TA_<#k1J|wwmCh0tkmxFyonO)V^4GjUI
zGzHvW)_w%mN*^Qv7};gej;Q=8V6*2UKB4Jks=8d8GP7SoBX}GYf?G4=5LJ=T4f9W^
z3==XKgOuZegT5brJ7Z&{+#54)Ey>-YxEm;+r>y=lqW7xky8m?FVQ#*{oc|2G_S}a&
z>q~vGcet}(#7~q4d}+CS2W1oC{|Sg9U}ol<+h5Pz8vgWY$oXln#bo8j4%^2DPO=1s
z=5-LpxhV1-bbnMg^sicb@xyx6?XyoQM9I`V9g+3<`_)?fU|xv5x0FMAmt>LLr2wj_
z`h~k+LjV}HiWX}Y0BcRvl|g*b7donO+a(N#aeXk)gG5U|Mr+JZv7p#`v%77NGbC>s
zIvpHR8bO)#4uH-LX}&lV8y7tEM3|{+Sb~9;{h7Px52Q!Z!LL)?$DKUmK_B(#r{|s!
z2=fY_=YBl}XqH_wgy_6f=&;Awt
z{Ltxf(MZ=a&5fU7++%;yV}Jp(B*~sHoWHr5s$~Xa5S{-Djvls8&wf2k2m#tMzsyo^
zzAt1b14LU2@?AHT_8?&y2=~sa-Z6uAz=p6^Fr&_YS9pu1lWpbe1=lOJAzjjbtBSgx
zqsq_TM~T?7q&_s(?A=OhSx$4TzW$7W?JjE1MmgZm*o{eD)rEO*jQGgsWdc*vfv1dr
zW21`!cTe1U!d`i%v(Ln=^HWgNna50&p1Q{Nl$i~cp-`37tm<<&_jIH?2OM{S5V|cG
zojW4i{d$nWB3Xl#`D%z@P@F%mn??Y3%Xs(DhNO&og!{c&O@-&WtAUtBX
zx|7O{FbyH2I7!7NoG*R7V{69JMzjfKs{DXMd&}q2a#KEKXS}LHySQZ-yOSN~=}zOK
z$88+Xq#rx*IbDuN`R>0#&a0FYKVN5S<2=@&B`xAs+q`ROJ(x|3v|C0}lGn>NLg&+8
z&077i%2I8Cil5{2Yuz+%sdfFz_wXR;Ta|^#O66KF*z7jI62tKK?x@%IefJXC6dN+8
zsm?V`38FgumT&(Az(1<_tR!Vtt)8CZa4lB^`IoHswvFOFWoO;sGBz39v^IsM6
zT0E&-JARPo84}3HI`AB#tGcE%r~0unO@9Zy4PB&}3ftfk-x{H~+*H!WD`ZL-)&Hz%
z5BJta@jd?MR-I{o`&R$i?)goR)t;uLd*zM)IEF7>a}3*R1hZ;X-ig;+Rz!wR7@<=1
z&d??~peAE5^a0dv>^;}w6@rcMv45vJ{$q`r*`DkBXOUgRpx_fFtr+Uh7W!B&Y7@J*q>U!zxN5+l1rBjvgV%cLTNMF|yJe?c)1vz;?>`B?nYIIa7^l*h
z-ngMk6Y5HgwaofFgD@)ZryiMos(!Ok76li(L=d1nzg^}kNYg|c25Nmg5V4}NN((K?
zJ$Nd+uDGq%lJMPqPSsanZ{{_#z022(PNCxrn8a~jO2W+*92`=<@=&?eN3U;x$ZtHe
zje0gJu7-E^uaT2@D`K9NVGAdtKTCzX_{*Q}w?_-VwLfnS*dL)~jMoan88r+(K#<%O
z5O$4!A|Kl1rN|_ahXhiG)Ke2>*c6A9Q|0YLQ>LI&q7K4$aROmlLk}g}miY3Tk|u`i
ziz0m4YazlG63~y&DkEGWPZ<3Zrgt83p6J=GqRO_zdhZmJCniCvbPJ&9A9Qen=ypvk
zw~(m&4thMW-1#G0_CKgE*N!)eTui7ETF*0;lM*jmOj)`=np33foA$3`fuw*ssjT3{
zK>AD?klW#fcj>-_x{mZbuFZ=rs$f3V`2>3up_Z*eCeYCrJe3sHx;
z1KMSV`j#G{%rCFRXy*p!ffVUGF>f*Iv+HZhtdCTHZPbN~-Qo1n1A(~2?PTd5lhvih
zs14qME+35`WjnKw6{Q#5-qZ)?2@|vLu{Niie$u4*{eDY37Hr3o;xGMG&Zs%~5hAS{
z8rj4~v3(gU6LlL(oeT?uD&N{|hro<+#e-`$2YGWd~({D^>pLpvU`f+OSU67#%+n
zLgJRAz*FJ}45M}EvGzVpO1+T)>>u{JA=W5OPEW+==~-Pztm7Als*ww_PiKxxA6q))
zNL@L7(|h`OlaA41y;p*&5qH#EGuxch@ipJ*;4g@?W6vsj9xP_ANETo6N~m~8z^-DGihwhWxURqb^W)`0g>oH}<4K&{Q%6IeWXGeSoNCRp%Xc3GY?
z(arvNIdu88xYQ%ed6b_;!1iwR?WhM_7PO^g()*|3>C=K#fLF_K64OcV;hRYGv21wrrRjUu=%PyXNF(
zya~#CvI&%nX;R`UNR?I!exTpy1VbgQob}#C^<@c6dC>POi{p~C5K>>X;CJR$LajyE
zh70p&kSB<=Dq4dsX*}NxoV}l?LRH}0UvgzR+(t~Yfoe73+U5%MZ9;27g*}h?+-qFd
zXH`ec%IJnv_XumfUX;*O2;vU!w`rN56XoUZ|GBofAds_Yu^e~qB!~pChaRyp;-aM#
z;I`%*+wYF&%%)92cSTNLv2#?{2V&?4G`GzZ8UFYWTDJ^crAIU@QuX&=g_-$eLM}{Jb2Yr&t}|7efAA$lPA(PTen=Vg%j|HM
z$e+vVo4Pv_9h`uHybe-5=E7yWo)+Pp@D4Ja+lvnqD6Xsxu4s0;pm0X9ds{f(Tfi{J
z{J7m61SD~%@y8F`ax+|So-@4YVl+IJmJ^(QCZu9)tK(s&RP
zTZ6l$f1%SIo~4x5R^-&Yf{1aC
z>`UAjv|oB`|Ji-|*&7|$VERgqY;%`=(JZ>219C!3H}x-jOPYfhHXWrqyf-pm23_Vl8|`pF
z6m;w0vvfLjd$+=(x8Sv!No^Uh+JF>TTR
zxw5B=H>%ns|CMXRgr9f>3<~y0&y@!{q|j5ezvxB5p?+9^9>_bIEE&pgC)RDSvyn{KR47Uln??rqWdiEgTtBV8Z1sX
z;mPfruEIK~9_F(;!irt?brE+G-Ue@&^fA)ESQ3X^rD3{HZZ{%qQ%rq}G?xvTPZ&%A
zwx|3>Xa0m)n5iZ@$c>(%m$Pcb!DMGnrXRH
zw^TY0&77kKPK$zNOY?^xTtfPL02X~l(VVsGE}x!7c!mlgyPNM9KzdjU@E^AGqANhNiF3;Ze8i#?&PT2b^pMVTm0QeMM$4#d{rgWGv=`(>)9F7g=phmKNmA4`Y1IzzVlJ?%aIStcdzAfCRhWn0Raal>r;(C&EWZH
z1{gCU?0Tn&IZqmIkE>?nq|F;C*A&_H2)qeTW?18C@-ea0GtYZD1{v0rdb!RNzrl~|
z@eZ@3tT3x|D&93SI)a*RwsI0Hg)Xynq#)MRT&_L^bU_FrFOW
z8)qB73e*&
zdMy72%8k7ab|E1XR?_!iQM#JjdUII)0^z&<)toRG4SE1efp)@FC#do9T`97rSVf&C
zZnVicl>3w64_AdfS6fA@kcqR|uy+*%>3^!jO&H#G!$XhWe79eO!~Uw&zQz~-yW1h6
z?+qdAPD~10aO@ii6{7pv6ZzoP7zwI1{0jfNY?104USc1$bye(3f%r@HrC|w-6G*G38EIurs;k9a<9cP@!?)-GlPBh#oFlEk50^=X|64`_%&~5}dLFKKqIGjt5aoiD51qC*S0j_xSv|
zHr9|1>VH{vG}WMJRsWQVk}gxCs`vZ!ogY5pJ6W8k-LF3(z3zanCj*pK|9csXo8*Ov2SwC6*a&Yez9sOIq87}9JzUI*_$>$O=3
zALBURc@4@opf083n>IP+s0Z~c;vw3Hbb)rwYCqSZ=?`-3@6>7DFB;S57C=kTsQz3t_xnN8;Clp?lg`h9^Wtoo^SwgKsk}Ma@_7-GankmW!b+?
z9Lar@8(y2=q`93YBEQxn#n&grpQca~Eu@@|(BHESmlD#%dK#p1R!EAnU{1d=pWuH$
zY>TZM$WsDkBq{q0G8L3_>@<(9k#R&}m5EI<=abD`dtuD+&W^=bDLpK|V$
zaEMctV%Jmd>af0afi+z+!hnD?WUC)YieEAH5yA6qpt4lt#7pl=P{dD?jYFFibD*&KZ(9F%I{>cH^zwcUo8|rYbDr`II6Xrbp>Q*2PJBpS*al`*6cg2
zuB<5uZ_Q9oz!wfj2a~tEm*W4=#(XCoHi5Rc$EK`0s!#5XsaN(mB&-6Dxdkou+)POB
zE2_wsa@-ux%hhp%MCuSSBs*cZ(jIUwU*Io^a4*;etsZ#xcv#a-dUU^w68qPaFI}Og
z18!&N2*YFr#1mt6mw3X#ot55TV8gF75xN!{tL!+73U?Zs!B+#O*W7*5&KXN)>Ap%U
zb$aj~Uk;4om!MT*?;qab2kd`@%Aim@kKzNwi|kGgr|*#r_je{<%e)6&Cw|>omHa>n
z100A(UE9xxc5kor2O96=f*oU*280R^Ldu}CK<2$HZWwYnQCo`x^6%-K9XJukvv1U*
zUB;!i#lHgY^)+oh*k0wHwBv(iCfr7vO>-~S>=nmsgevr_*ZvjOVa9w5WtD{l!nL-+
zn%^5~?J3*Q4+Ww}V#=2vrVwzZC=;c@8D)=@e40{mHp1YwhqB?2cSc%msqSSb#1vJS
z$*U%aYMU{NiOY~IY}U#c*3!9OKI`c=OZ2j{9db?e0|5|Br8GvGkIAv
zb?^T=4@vfh<9T_Ex(OMO&BLcZGVjSfc7DHC2{@n9|0LM2c}WMLStbRZLK3w;Kyu$d
zYr2;jCX_`&1`XH=Y^C^BO@GK0;a5xhS8W(SR$2=^67qM9j=5%=XBWTRorzm3h_h4n
z)r=3>41ZOfpDz5R+#as*rKv`ceF4YmSdHzKF>D!n2vu!5d^xcH2pa*Ac7aHTni}u<
z*iR<0%VUG`xJ1p_b@I9YT&dAoF7BO?(b^O*AtHL1
zS+w)z@I3H5VE{3G)L0~U5R#Y`Jn=*@ikT~G=&SkH50ZpJ<{z`R=h0^#6z-#UF=&~&
z+Ljk>*FxY~zEroo9ir21V~^`S;Q1)HSi|x8V*11P=_fSkRG*-T=NBvs1GRiInCnTPqt@>^_^E2u{F~>)$E(;Bj@-4>M%0P?eSC6CP@7mpkLZT
z2}6AjKQS{ax5{T#oDFMAFE*HIBJh<%{shGhu&t$A?u3w&_U~|uUY>PIoDL7f|0*r3
zewUV75A3qEfM1X+p&bGRIr(@42Z*@ts+0pv8+wyV9P(24n#9HLu<~13EU}V
z3DeKDQ?-M;@D0dz
zA~x+Voj#^@(B)QHQQ~~f@uT!hDAIo;m9U;)%1-+eb#GVa+!_gl{A}5>MqRT1)3ddv
zu?>uODA4Po&u2z~$V?Kg?7-8aWKu=!>P~v?+i-LVOtYqzlxQbb9p|QXs5_t&XdD&a
z#Jo;R^GCy2aQ7XX14!6m5(=>be0{+1qE+J$+9l29aM)-83kNz-{z6c#3Km3z4lsFX
zs;xf|9~xxX<^_cKpw(vzD$+gSX0>lWgBgYE=W0$Pi0J~e+!+-uCcIx2nZ#52asC0%
zluiPHQKn&^D8FA5`q-x5qk6Wao(7v+lT0tZ!Jpja_y|tf{c^r)g=}j%*-hkCF`BZM
zS~_ixUzqoLf`68!W7wG#;WTIhS>(>kxu&5m)yk?dK)g^S!bcM2L99MUWZev~hc2>&
z0biq#HAnJCPYbRJRU^h~m
z;uAXYv(Ds3Ohi`=`M~dR`b(ZeKNHf+sE{RD24sdSkL>5YNsgS{8{PojUC7yTL~$H#
zHYU2GQmfM6rZT*CDM;uI#fprtqPv3&{6=97sqEC$*-&y5=7<_bNpm>d`o?F=S6*Az
zZlmW)=)J2nB8XLM3Hh-Oq$`@_=YW7TJ$1*s{wgPrGd>fVDVKv4ce7rtaI7?OIs95D
zs1h5<*5S)h;)&J!*Jqth`?~
zcYi;Ap(rb8)Ei=*EX~-rP
z(tEI{>SDbB@e2(&@9lC$zN6OcF5=)k@woL?jtTN=QN>BD)}&OS$_DpWniQ#w_3$aM
zJ<%r*{EB!dqVYbh4i~;Q@!&K6&-?R{$<-n4w7IL_oDD7RohA?#gxEWt6-OAex7*F$
zfM3%%>6bxw(fz?MD%Htll~cr3dmHJWM^t_Zushqub8a0kb(UcZxUFug0zB}Dj^Ty8
zsXE6EF^ew#XQEsE&8Aay!x>aJa~hJm#dRUtP!mfnuho!D%3!+rRKaVWch*;LTHMQ9J>|_ov?w=k
zlh0K{>BBSG?)-5mOvUhGiVg73_gi!dA089}wNa~vfzRE+5Z?VRWHPdcDx8MxP
zKK>Q0+#O&P_7AmZ+w8O4wX%QfRHJGd@_N>qCMfb}Fz{`PBK@`1WN~nkqv!KrWNtvs
zK>ALgF>783HRC4FmP5#LK-BX`6BuMGsHPSc`?S`a4|<6oG4|(Fs@?dzrLMl^8~=YA
z?r+v4ziiBQ9F8M_01;a=@8h_d4YxIjyqkcB4OS9!BedAV+oPqH>kJadKAo|P{^s!V
z-UTYQsK1}fPAMD3{x#Y>G)Mb(XH_`T!1`E1z4gD6YI*T=jMW~-@BL)X9}XT~#8yc3
zG;cyFWK;KR`8phZsG0*Jk8r_ruc``hdwDfOyWQb6*O}CfCg+T*&NTtHq_mh*46i%*$ZAMC;Tkro(Q(X@Be8W@F+qez~HJZdCDBm`MuRCulr8aUnvb_Y`~i=2cr^p^4oy<2LB5}0mP9#6tKN}
za9qQ|>wpZ0XQFlsB=JeZ0`ZpiXR2k`I8ap-j-wx^6t67JEl9yu@%$N&v|M0
z@S?c-7jk;k7th`u
z#zT14s^Jy)MSZw+vuMMGEbz@oh4$~$b*|jfmXW*L*Pvv@-UU(0UsT~UlRI(%`XH+P
zl!jz46XGG>)W5NQ27FIm3+SnzO=K!%D4VQNed_(Z5Z|7g!rmbhK
zNW;}83hd-x*(uiI$3&7=-QnX58p0BjuZxC(n>Rijz~G24KIfqEIhL#&=wfcatyt@7Qs61DJgT8Hkzf)ca_K=ck7gg46eR$?a%O!J+e~UJNB9apHDEX<`c*vC_7ZuX
z6_6KpNUHna=Sk8OZkl!lQ3Bf{Y@-*)%*B~0=}2Mi?R|fq>2qR#8(3r|DwJ6?Xpz5x
z&(AfaQ><3JTM-6mjY9Wa27o3G
z9V-+Ey8+!xAqY7^3yFO}Qf(lq(O>rJv|Ym9M4IBtjHDxw_K6vO2fM(4(}p5!9xJ-D
zMbOUs6Pr|v8}f;}hG|A@?>Vic
zKGx*M`*SfP7%<_^8-rIgOw6^V*DE^Mg-4W-`s(ygqgeltzmcg5&Ks@_82S_RKp*T#
zdCc{q5M$X)6@c{Tt!G(r=OYQOg!3BLa1Y)}!y|%G606grL)s&={+cT*DQ~4U#(bu#
zsn%wPQj=vWDf2w%6@?v>d%5Li^;LSnRG6M0P;$k0#w5IQeJ?76l9X9TyM>tcITw@_
zG|g{pRcrzG>D!3Sg<<}}!>Pp2=N-sOMbA%7e96}Kb14xfv<;;jNf5=a97ehnQ-8md
z;%xzm=u%C97;9XR%GZ^S@|9wPSB>Yrj(~N()bV8}LZWm(rpZiSj)#Yd5hw3s(Xk*n0%v2i+URBVRZjk9_Hv%8I0c@(zi%
z!3Qmrd!^b7g-xSawam8y?OIsw`foezQ$jR(w(tQ{A)0w7)NY9lLA3ca#RK#7=q*v$x%Fzb!ESduC&*zU-TWT8e5YkLS#SyJ>I
z&!UnP*0|>c?0;Ge{xcX1r|`uZvs~0$#G%n2%L*msM6bmj2SY
zuHZ0uP*-N(J$9I{!$dmaCj
z(&aAGy0wC<1KwiXc$PA}{#4cM0Fli|a4pgNqUUPStoA~oIdLW4%d)v@scq|NL}Bxi
zM~z2lW5-`G+_X#~8l5|BUiw9WY-Vr&(c8y1DzdEi{Rwl9B@bmQ4W^a5K*m3)7dgp!
z@+1MfQ^~%#ElqE-Q&@D&eL+MM<^kWHflpwT8GXhJh;_K`uiHLOB9*on@>h@b=4jGi
zY)uq3v);7O6rkt$D>9=H5#!DA*};-A8)8*GCa~8nG2-_EFIHt?TlveK9KeHP$nrN_
z519V?LI>ftBY7fFoO)HyqXc|394G~IHC7_ErMaEY8mZ^?xNoSXhY}qfZ0}w;N3ggj
zd2DQdKuM~y;eJ`#zjDiNwkFS#71e-IC
z&oM{aER^A9*M*k7z_+pm<A7p&m~v3Xgxx&)<^c(hBZ%o
zDW}O&$t#nG*&DW*_3mBMu?qd&M2Ya@4sWy#S|>Q$MC`;eJv+IPi8VVHYz-%E@+2Xw
zMlI3ah#PN^j{j6!DtbZ$kD0dS;cA76|CGtkr?qvsd0LPYR5e4q>Isb!n1R4Oe`p~E
z!)dTo=;K;3wFby?vj9=9nT&m|nVC@~=-eet_;dy|4{t{m&|NL9;3!?!;uglO@f?O#<5$Qi}
zRV*hhT^j7cJ!|1MJ;zER)Ls`+OT<0!HFPF5jQP6g8~f}
zn$2uSprE1Eh!a{h_Cw3p!(?KB?(a#4vHg=+?ss!GpS3Svj>IoyB_?>tajBAWyYQD@xr{@{dYmM2DY}voz27}?`Hb}$jNdax
zy`)DM>Yks3nSD8*pWidKmXf>zdC?V|cyh9D^P#iF_{IsOSp)obPL77Kba{;esng!3
zG!}O}O-lIC5EP95rO>rIiN>{$1Scat?!WDm?Ph-QlBz%XdOvE{=2P@=4FJBuEjyT5
zQ+(wJM_n)D_@W-G-(GRzHqDi={sU_~8NW0#AEy=^E99fXe2ytPB~JQ^HuriapC}-c
z15c|he?g9*8d@qP$=0HYK6io64^V^fC-91RR)$hnuLJv@e>WI~-UD|-FdD7`!)3@8
zQcyT+CX853)tN3K<}a=`8Qu=xrm_Q#x#`6%uF8{IW>gf@^CrDUibc<>6Er#eHPJGR
zRur48op(9VF4*Wv?CCHuZYN}(rZG^pAg7qz!P6Y^34KB$bB&WEk_-HDpVF|R`AwT2
zjm=(Iw|Y{@*9M8a@CK6&08>Bxz(-l43wh1Y&8hP61>qrL+Hak%ygM(eM=zlXrpPt~
zDb*E&*1_bXiloruw?01mYSiB+{0~V!=RNld#9Hgw1$FI-y#!B(O%!ynt3=vVqUwI+
z7C}!55&Jp5z~s@#F|EP8Wk9^~G3#w_kbJGhbG!3#gk^C@zKM67Zk5t(|E&hons?i0
zH2y(tN#CGrb4%R>haD*dVIC71Ug)^(5!CyJ20@%X%Jp*mH!_y$TLCr*(yyni;?v2i
zZ9{q+iLVg)_f&EnYt$rpHF-JUSoF$+kzn9pw4^ak3V&|`AOEh75F6R;2L|WMhZ?au
zLQCkcHMz174qaC3IcjK_;q?p{iWmoC(4Mz$tp`^%6MH^i%ImP#_RR8g1d
zp*!#+Pc?2xyVn=61$n5s&oZ(2YzO^3lmAQe{e_-$FhRdgn9z@e56VE6R+B{jyp?L5
z@P@g7ZVO{QjeP9q_R{2dv5p&iOZgqFF67A8EmODq5PSZ#@xC+me@dnE5iV3oLnE4j
zk8}FvR#nK=_TFb=ccbC&$Dr8~7T=AA_AkkOA*FJ23K3t4_UVQSMsU=EYg&;)$X(6b
z#r*vtICcHxQw>wvq+|aA+xU{o%nDcR)tw~{{@Nt%)eIo3!7LCz1oMTwRLlvMH}1rH
zPtF0$nNdv>N&NwG*xgo_eZtxbgRli_+Psbf@Y&u8y72&wD^y+>emUxnw;6buy6Cam
zxs~8mDtyFqqStN*AL3mBRqEXgOmQy*D92k(k*SGoCV#XiZsQ2EC3ZQ_M&yXy3%pE-
zA5DbCZOI4d1EB{a9-^I?0K5zwgAqqKH!=tdkIjs5q9os8bb=yq$7%Yc=2CXmuIgu%
z%X{p{?wd*mTcqvVL!)j011sRoiV-b`f2JFxlZtKBo6P@SnApbSCQynm7iAh4NF={C
zg`U?&Bsca?sbUOM6MmKtu&3SWb;kzEM^Ny-aTVU~R3zt~3^zND!
z9H;A^_MNX_)lrqGkhY4Umg?9Q`{_DIrrI7&{lJOAoa=|Py#a?nd=yAWDWhrUldU?n
zO9|r_C~bxgvE5c(c2^=(z3Juf>g5|iF9>w@l7&fSSO^XO{!AzXlslSUDOep)iC$9s
zoifXKt9G&74(=@&AuGakO!%dz>cro6b0C9^Z)8QaA+|qoU9zZYrjlC+cXsi;rCecG
z&hfR*qui*WAfK$1Alz*d#GgRnFXDe$OHNHu2mBRHO0QgPujyd$A3XbE=&}@fr83mw
zBp@+p+`FU|#F=ZQ$z0IFe3DsyjdYDvs23Z}BMp~)s4;}qL;SEk*Ese-f*&c0vP>iF
zBtESz(!InQGWq6uiym-d8{y!FKApi6%(vH!Uok973Y+Dr&VL+b37$C(NN>CjS0-|9
zHnNqKu9-8>DTwim1x?|CavhWmIXV5h1(28$3&hOqSoKVB@5I8Gd9d=y=B`mL@a;WP
zmGumkA<83hcN>^AuZVjp9_zf#d`we@_qWhbe$4_Gwj4;)VFqLc*QQbAm+&R
z4=bcghRqMmvCXg&)bmuBG|r{^%6{u}o>?UU%Kb5@pRQTjSBLG$PRDz<@8d*d7_meg
zoV^?b%#(C19Gio(xl@=q5`MrON$)vKV_3|4XJOinKD})L8gHdOgeo|HrgGzbI~8zk
z?ZrSzb`B`Xdc`V5-dmP&J~I)Sn}Y3^B!h`}Cw5n8K$1Ub69>mJroMj*8Q*L5;!2=u
zy*Vu%lcFWu8{hV>sp|h>@4cg%TDSI53nFf?Ac%An+(9;-(5p(900IiqOK71(=v71^
zbP*6jl_H2zr1zq92t=BpgH-7yK4}i<2@uhYiy$0}~enGTdBbfvxP^th-tp3EH{P+V1mTMt)
zn|GK&W1p^mgDxB3KCw?I7+Q1fOfn!GHG9bthkn?uaZ{!`lXE~fR-R_8^KjO-2j!X*
zVF2(VCme9laG9dD=3jmoSn}MVqk&c!Jls+FM
zTc)IOZ&kmp>C~8{H33TZ6GMXoOtzFmwZQPsTNWUF&=!Ffi7?@^bVtKc3t(TM<1Q(MoxPpb`H9TfeY}!u_0Jnb_oTy9Y(Je4~@&}Lu7(_{V!A*TOM4T_D
z_6hySOiHj#bWUfmOLWjNgzr%TjN&B>mX*UlC8^>3O$D*cH-^!wNdLXoon(hn|+`D{qY78tB~mlJrV5m2Gmgl#Ot&T6@avcuv4z33Nb<@9sk7
zF5_L%Q#%SkqWNC@E3#)j#PToFN)FOf(aL013IsbZ;Xwbo{saypVesSLvz@V6$zP4phceKci4ltQS
zeDg8|4~uQiMu?{Ap`k%m4vZhZovyxbR1s3gy9*%qSj{U%i2+}mv4(il2Om18_ui_~
z6CQZ=;KJeWmGOWITS)1mDW2LSH|U>G(mNho6p
zv}cbBM~0+LajZtDSWB}sSCE)_l|^0Ki43+r)hCj=L_7{1sC<3n0K`2zNy-b0_5z~Q4E@iM9EHWk}v
zg&k)ko;EE5DeiHm5D^I=kFW`HP1gu_@=A;V1$cRvdEqqu9piYdIU=9C_+D($wI3%O
zTOuuJjqeP7Nl_0TkqASA5}<(z7i+hnm=VQz=<5u!iS;5N3M3Oaqxx6
zt8Z?@fv_VZztT?<2UQs{Evd#*opYxWUpBs)IFfy^SovKn^jJMvOS7(?%@8-mSWQJx
z+5_mIw9p3~g=Z#>qJEO+%Kwv@0Cg7r#ii0B10@z6e{AnGIFl8;oW~bt8@(Gw-XkXs
z$3CGrNRNRWF)&oAPYv#H?KgEnUi5=q(;eM?+f8kTL+Hbuf%9}s(U;}Xwu=g5^?f(
zl3wy*&Z`}gU4$@5kJQ6%K%V0i{>&{fX^A+pleG&Ox8TwC298tinSf_CKxYGjx*7A8
zoc#S&$JKqKsI_juJ3v01CzA2R$qCS-^beI(3!LT`@d+K^9JGCE`zI(0LdxMXOzVi|hc~Uyc
zG`T(xNmq`B^UCg;jMSQ+;WN4_@EQrE*eC(T;3d&aCao(G(?j1qfkT^Gy_vLvUU_%l
zYSm%?my9VEDfSUdz7dkmc+7({p~l`!>)KRLMEqKI+{?V`77S^USN%7&xi%eE$sYL>
zULYGT@H|!7J|=_gYKD!ttVrvx`@p2$Mu<$zx^ZF)K`wQ?Mb*WWv5tmXm&k$pv)@nl
zVQUtqUaPj|J&gdcmfv@#xhP%`)7n<|efThU%o0j|n&K0{4s}n2s4H{ars*%}h*|(Pqj>@uIzPl!l@TaN>h}AIm~&1(bJq
z*|frWfy#YP*%V$eAHjf*JGod>B50SZ4y0Gv1j;Crt$5H5V_HgJD$^|O;+6I%a6bD8;2J-o{+oTxLdG7GiV>$iJy)f~kD;X+u
z1H?4Y<0q{p(gir;F{Qv)K$BQud<`I5uHy|r+#P`h3<9X^y1z_ZTt?5vp^
z=n$oG@^Z%JrE+$twdMo-AYCg@YPV{PMk}wvxV?O#>svWg{2nbjcN^MVxVMy)Dpko$
zwsEyMs&}SZku(|I2q6r@r*P7<>-N&A6R+j-wa=ze1(=8^k>ca%}(4+qQ
zYZ|IFEBUQ>^D(zr7bic?MY;nCrc20Skq8Bk&_!`Ey~rh}@VbBszkN!A^1+sy3%0m1
z{X%G3OeB&ptd_O^a69#EeS+PoQqzLvKO2JbpxJTcst|;-44++D$TS+mUx_)Rt74Ea
zDWGU?60K&;*JD~nKo7tl;H8*hlX`JdVciKr}pM
zZtvmNU}05q2d5VStlhF(Kkoq}jS+yY-3fwsXu&
z!MWqVNLL6H4p1Ix${fwdoYf_HG@7q@Yk%LAzQ(fiJkoQZ^yF1boA@{-BuP+ki
zjO!lNSWyVPmQ%n<|26VG-A6)~Tkz@WOp_1Ox{Ws1UxSN28uI-ja^+Q79H#1Dag~OP
zgYJ;r^m+&7#dorl-HpZfh%ex#hSIqdBUe0%SF1ZjbBY%WnjhtFD8%?OTFxfiCZ?D!O3CrtVX1NT5at|tZk6kPFk4|Hiqx4_ZN+s;HUMkRZ
z7*QfjsIZ)Z=OoiVM%_`Mbe2!2Uos?&EqaF8Fa8X(2%5S&C#o(sxbZ9B=Y@S`(<{<#
zrt`0Pd2Ocj9<#!S_mnQQV+Jiv{NkrzkIdhr7^B;!sng?RRT=HjmO_rl3#xveg82C5
zFlur+h${_8z8g#~vfa@P&a9^$e$@W%vDqBz4sw(Q!NXbFeK@Ezx23*_qQt0shtBYM
zbUQ!MirL$gXDPFqhDH
z-qVgUITvDLT4y;y_wFNu+#6p-q7-}N5Lsz$nf)S=T?yI>1^Cc+&O)dg)$7043`h}7OYQcP55A8pvT(k}
zF_YPbZQo;m@-&UBK16nS$pkHRipOnRq{%qp
zVDHS8UpPbcBv(nJUe{^7LqjSpmqTX#NLlXYw)O}eWNTA<;UuHLIdw}UUp~3`tue>=
z@WaYe**=EL|MZ6xmCSxH@rT9iK$dew#pwAH%DiBQD6Y;Qw<)%Oc9p@<)neBCO9s*J
z(_c}aOLuhDfUv9(IyfAU)b;t!!Fm&&1-^S^(2ZF}pwem5Ux4Vu1m
zCM_2Jpf^2*n0pi-U|vyq5ysDPcXqyiQ;20dDtmvM(D9L%o2!EcjY<1_DEooN^qbxNSU^WJ0!FB7o+(?ItPJYTW!Ak0@$viG
zgx9=e+gj#Vux&2G3l`IDn768Nh0Vo?Ew+y1ZQqy5E2S5u)lE(;XYHfncbGbVO
z^Yv>NFgkPPKdg!Kz3RE(&7Wr^A{J71BhvEoe9Lu$>K=J~6zfni%sL#kII5QXRw!|`
zeBZ`^AfYA_+;9S%+VW2jr1gahwK>cGbd;dFD9nvp(l%0yl~mK+O73!_BDj|*yWG?N
z+FgS^$Ax=4Za+|RO3f@v`;Y1f*-k~Xxeto^(S>6EmUgtjjn?GF)|_(&^ec2<{uzGDn|Bkm5|F7
zIpKCjNwL9>QsPYU`l~_v{3(7HtC?1-ar~iKc4Zkrd
z?6vu|-KXVR_^&Ebbge#p#Cp-z$Sa_A5y!S5$BytPW~qF`J^uTz9cv?4zUZ3f+dj&?
z7}xC2Ccb*G=yml@>U@fr{d^jSX1%U$(~tUERT0s^C5IYEPF<0>v{{18HgZ(!Xhp%L
zE_vEct5L>PVp>~dG2F|>@k*ceqJ5}KiK=a1ZJgSJ5qhlRcBg<%`S@v0-lz6=AN;$<
zTNE+yFMUMZ(f6BfA}y&4A*O`%OVxdR-^DTK%+vU&iGT1YmzmJgvjKDCncNR;ITsLg
z_Pb=fQBbgZJzdr~PCA+1d|>b(F6!+HA}D{T)>H?PagcV*5Ylq)90YfllrZ6g6XJza
zJJ8WsdZ`^WCygsKP7a)e@BHw6pRwBdw7>9)2H#4GoLFx*&zH1wT{z7fAF&O=Kz}y$WlXR&fS(
zff;JRCf`bZy2F;J4Fp~{Jgf`fa9-nb3|7KmDOAB-ibEG#Krgl31@z`XQR*wm{>_8XzIE@VbOtWMl{
zDXx*}SYEIJR>|m3)8gV&*+Is}`KqI>Y$+cVx}Ut(y}tkYV07p{kaZ_(2~d^;e^-vP
zYaw+Hor>;SP|{yiFBaiEvYT@rUV6n0Z#OmhQ~;XX7ND2T%kBiQY9FVaW2nf{BB{ap^HVqr@U-Bw{Ex?f><%ONwd7<)%ufc_>?*rq?z!jpFRa-0N=r{lVo5m(OrMZM%(9G7FGy~0T(MCowCD%
z
zjtzh!)=lsOx|cbNrjJVdr!a5H#BZh?^@k&r#i3~pJVt*VP6On9hXq@?%pa9ll6t@Q
zSO!srSq1r-KcAsQkC_SAQ7v-dLSmk&VG1uk7=%F?jX@a9pqaQyUxjztevL{xj_IJE{Kr
z#^9g+`>#KT=)dFee?JZ@q{#hK>UXfFurFKcXaOYmmiha&%m-F=XMtqQr_9<5pqASf
z$UXQ`bM;LLH;`J{0{HGFB>-{wq*Y+g$^Gs7Ozqy%oR
z1G45H0(9M50RSl~`QNU>&ZJYy$)$2%>nf1qlCGcm{Z-impdOsB8?AL_;-Gdy?36N=
z%V$AJ{@?DokyyW%*mtTP`{ifBV>=3tu)go8@0W#_XE>-=F*M*!_2*{C8LU%~brKYf<8XChlY7{e$|GgHuV0
zA)rLg5LEzg@o}9PJNaxi_VYalukz|5&5krzt+vXmJ~yw8L_9gNOP3i+I(P*6YQ
z5Fk$0_fDQVu9|?~n=q0*HJ&@{;ntBVv^Y7LW!0Z1b?R=5QP-JOeCo-e_$PeW;clyx
zq|a6cQ}Q0Daj`4fRC_eoDk%s^9i{4s#T#Dvp|u1%wd+j=>5eNsiqpd=)REGhGidGU
zUxuAc@K3xkON<1#`Gc}SnCm$&HX%|Apy||ZK;fhqtaG69PNNo-5J4p4o{X8~eZ
z`h6hJ_yS0oxCJ0pCsBY=%>u;A$z4hS-W;5-$Ueoi2R4sRK>V80K(4(GrV>;7d0!bS
zhnDFC{d6f2cYt9rI2{)I-@^jW@hA
zJA9+qYj4>?%8L=B>vBAdt8+E5_Rj~wk
zcN~6dOn}n10M9=Gv+%X$A%50C|MJYK8Jbc6W{f!8nuU1>XsJW@k&`>d7+o_j{p63n
zKg>6aK3j8k>{Lz?lo)qD+-Oye^uWNo=XxT2oxPJcfQ9B-HvtkhOI)3iz4@Nh-lJ1@
zE_M#$k=be;>v6j5mT%DFT3(-yUG%T9d-&<$;4=*J>#Y^=XkmyA3(VYGuiZu_NeMt>
zF(Ttxkoe5)J{DlrEC6yqhxCfKI1_3ImvJsY>xc@-=9PUa#bY6fM;B|M>6(5|6N~Kh
z@YA155TFJ_3%*8ERF>g&5QB_4BObPXZz1({!|F%2V^5g
zfEO?$n_2@TLAnrMe@}@)_J1s>7w0cer5?_(0^`?V`Rf@E!vFIbfmeG?yz$fQP5_SO
z-&Rf9eQyrLf4@%t-=X`D9r6F+vf0_cbqsRfopynFg2w*pU8c=fL6~~6xp;WpoJ)7l
z1QV!AQm{eTFsLW2$*t-82Kq6O8c2=TA1)6dU%(@fPmgK_aI0p&_mD5VAi_HQqR%R|
z7A-PjW6`~fTFJAEIx>O~?zrFttFjZX-WNSFj8Fp=L5ltA?O@{F@UZaO@ydwl+&b5S
zzEimJ4`1Qsa!ty8hJ{H4l)mf(RTLKhLe_$8R`Ktb`ar!-j|OoA_@`xYiE34e?f!*e
zKvG1UM+(&Ruy)`CrRK${U20~%;!-y-7LrKcF_0B$VRt$*HFBEp(_^bjdU*Cmr|@6x
z)xwf@v~8#YC6YAri$~oqQZ)USr%s)+NqynX*3uS#Dpfi|S|wUmNzke?g0w!<0~@0A
zvjQzC@KG!Jg(PD#o7|0?KT1&yPRK!OzfVThG{P+kC5OMA$X?B`W
z4@Jz1gZvmCWRjBhQsIc6e|vMsYxPa{4ngkWEueY8LsIDc%sWMKO*gtGkn=pg!2iB&
zgY8%QYsUZyv^k?59VXS`%O(7Trqndhgxs^$B2?$-JEoDc8RIc|oHQ?PalCx0&jhlA
z#D$(+SrqwVgxjdS0;q{7%@M98t~hTx;tz{+q1Ln%Unyt1t
zcRA%092EJA}U!aa|
zs1yXk${?-K+{TAgG`1yx&lx|uypUkMm0dQN{^L~K9;mvM8D6xJDd+}%`U$@Vj!j!^1$4R~$d-t^W
zYmtr|C$s@#sVpKX4E*`wsKHcI6$=%4nI^R_2J}tb#>CLfQ)B}&xn`ZJt5KHXbyvx4
zza9Zvj4u0a@{)W7!a_W6iZGMW0PXHB9RPwozH)hblFgvM^m?QMO@4T<%RlnOg&Kq$
z{5o~V`FtkNGsR1SO6}2?n9ExY^gHj16p!A*y`8B1OKJZ5VccAaB2o0UFyyRQZ@vg?
zJzLL;PW|l;!L3iTGnc)o(ZNR3@=E`_K6_#KL
zqVN_+J9*F4#lOS&!O(!+KQa2}0jXnu$`Sb2ZcB5@0%&%tl^uJ7tO_(vR^O|IE-B({
zDT##|j?65;DqsO9b0RUr`3|5gPDO0Z_yOIs
zDwQ>8{El0IcTP1;6XYz}`_@}yM#B@i^g@=g-@LTtx;yWU7hZ12b8_<~E;E?-0XTr-
z2P4?7kMb&CkALRiqgfRhGi%_MxwByP4+sYs2>paUsylQCG|-MgGjD{A)px!O?fy+s
zpw#R@jAihgrJscyU7u*##`}%j4YVU4)RU%kd4GkW%SKa$%qsPxO-wtgVMnr+XQTe{v0PlRVh1hR*ahj2P$;e_?)~
zHz;^d@R`spgM>a}Eb(mq=QX{Ivj>9{onG`K8!)P{3A&`KNFDSpN6uH+9cl+?`a7fO
zZp@zc?QY8*TJ~?r7teblROm>&BHM{tktv$*#F)-|t|7S(yYBhs8cB^pTz&_{_RGzV
zE4@jsk~=Yi$Iu9YV?2~4j@LGgA2~Sa9N(9oo?N^UZ_UdHO>w55HX<(Fvz}IKg_a2%
z>qXp_<)wrF#S&NnERAH0;W#@58cFH6LSU}0Rq+s&n!Yzz;+TeX6feNjPHWHGoUOcT
zJ#7p(p0(##srYE-RPpzW&5K%Llp!Bxp^!W7bEfl1cdWLJ5tq}@nLZ_zx-@2l9$FPw(8Q^HA3a|Q-h^JV7+hT
zI=GR|O90(L7kpXyMvsh~5~+>6y$pY);+y=3K-VZ@%hC2su#7)YpiIDyUy^~C8}1fn
z9|yErQg)RHc5jKUK_S~>!0ROZuu}Whi8O+Um7x%3)5YHTgu5&)6@`VV@`5~Y4YP%K
zDwWXuH~N^tuN7b8x4{EX@hlgc-DJmf!og6w(`=m%Bq;QM*yN2#f)4Znwrs6r9kRS)
z79Trr
zO_OkET}N$@cp9L;)ZS+}pIDh!b9vMU5QV7*S^DsW)C&;i+(%3cgI8|;4F>_EbPh3n
zpSZN}-A1-!25+VaV&1l}v}BNM8W)igS
zgZM_7vmzdbd^f#QfPB7Dgaur1QcOnkAO-=~*d>GGPPJuQ$0NRrdk0QKv%-gZ$N_P($Ow*NeLfnV3ZrK`CvPP&x
zKDG|C`1nO$)Gn*ybM>VQ0S;1$M(cay#XzHG`9x&wtb+}164~P6Q7@6*I<nIAHLi
zj|X?Wh00Zmq22!YZ`7}cZ1k+WnyGpxZre-no%fjHH_@w-y7~cCc#y
z+kBUWc*pkX)f{3@HJ|_2vbv)+?(;B
zM;w-k2N|n)C2u6*jp;j?&FSZ