mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
Update TensorRT-LLM (#2215)
* Update TensorRT-LLM --------- Co-authored-by: Sherlock Xu <65327072+Sherlock113@users.noreply.github.com>
This commit is contained in:
parent
78f5c2936b
commit
31ac30e928
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -11,3 +11,6 @@
|
||||
[submodule "3rdparty/NVTX"]
|
||||
path = 3rdparty/NVTX
|
||||
url = https://github.com/NVIDIA/NVTX.git
|
||||
[submodule "3rdparty/ucxx"]
|
||||
path = 3rdparty/ucxx
|
||||
url = https://github.com/GuanLuo/ucxx.git
|
||||
|
||||
1
3rdparty/ucxx
vendored
Submodule
1
3rdparty/ucxx
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit b99181779672965c6f325a95a29eb433b6e9cbbd
|
||||
@ -17,6 +17,9 @@ TensorRT-LLM
|
||||
<div align="left">
|
||||
|
||||
## Latest News
|
||||
* [2024/09/04] 🏎️🏎️🏎️ Best Practices for Tuning TensorRT-LLM for Optimal Serving with BentoML
|
||||
[➡️ link](https://www.bentoml.com/blog/tuning-tensor-rt-llm-for-optimal-serving-with-bentoml)
|
||||
|
||||
* [2024/08/20] 🏎️SDXL with #TensorRT Model Optimizer ⏱️⚡ 🏁 cache diffusion 🏁 quantization aware training 🏁 QLoRA 🏁 #Python 3.12
|
||||
[➡️ link](https://developer.nvidia.com/blog/nvidia-tensorrt-model-optimizer-v0-15-boosts-inference-performance-and-expands-model-support/)
|
||||
|
||||
|
||||
@ -160,6 +160,10 @@ or manually set a max sequence length that you plan to run with specifically:
|
||||
trtllm-bench --model meta-llama/Llama-2-7b-hf build --max_seq_len 256 --quantization FP8
|
||||
```
|
||||
|
||||
> [!NOTE] `trtllm-bench build` reproduces benchmark engines for performance study. These engine
|
||||
configurations are not guaranteed to be optimal for all cases and should be viewed as reproducers
|
||||
for the benchmark data we provide on our [Performance Overview](../docs/source/performance/perf-overview.md).
|
||||
|
||||
Looking a little closer, the `build` sub-command
|
||||
will perform a lookup and build an engine using those reference settings. The
|
||||
look up table directly corresponds to the performance table found in our
|
||||
|
||||
@ -157,7 +157,7 @@ struct BenchmarkParams
|
||||
int randomSeed = 430;
|
||||
std::optional<std::vector<int>> maxAttentionWindowVec{std::nullopt};
|
||||
std::optional<int> sinkTokenLength{std::nullopt};
|
||||
bool multiBlockMode{false};
|
||||
bool multiBlockMode{true};
|
||||
bool enableContextFMHAFP32Acc{false};
|
||||
|
||||
// lora / peft params
|
||||
@ -1943,7 +1943,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
options.add_options()("multi_block_mode",
|
||||
"Distribute the work across multiple CUDA thread-blocks on the GPU for masked MHA kernel",
|
||||
cxxopts::value<bool>()->default_value("false"));
|
||||
cxxopts::value<bool>()->default_value("true"));
|
||||
options.add_options()(
|
||||
"encoder_engine_dir", "Directory that store the engines of the encoder models.", cxxopts::value<std::string>());
|
||||
|
||||
|
||||
@ -381,7 +381,7 @@ endif()
|
||||
# set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -G")
|
||||
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss -DENABLE_MULTI_DEVICE=${ENABLE_MULTI_DEVICE}"
|
||||
"${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss -DENABLE_MULTI_DEVICE=${ENABLE_MULTI_DEVICE} -DENABLE_UCX=${ENABLE_UCX}"
|
||||
)
|
||||
|
||||
# Fix linking issue with TRT 10, the detailed description about `--mcmodel` can
|
||||
@ -538,6 +538,30 @@ elseif(NOT WIN32)
|
||||
message(STATUS "Build without PyTorch, USE_CXX11_ABI=${USE_CXX11_ABI}")
|
||||
endif()
|
||||
|
||||
# Defer UCX/UCXX setup until after USE_CXX11_ABI is well defined, as UCXX will
|
||||
# need to be built to have aligned symbols
|
||||
set_ifndef(ENABLE_UCX 0)
|
||||
if(ENABLE_UCX)
|
||||
# Only enable UCX related features if the system has UCX library
|
||||
find_package(ucx)
|
||||
if(NOT ${ucx_FOUND})
|
||||
set(ENABLE_UCX 0)
|
||||
else()
|
||||
# installing ucxx via add_subdirectory results in strange cudart linking
|
||||
# error, thus using their installation script to isolate the installation
|
||||
# process until the issue is understood. And always trigger the build so
|
||||
# that change in USE_CXX11_ABI will not be ignored.
|
||||
execute_process(
|
||||
COMMAND
|
||||
${3RDPARTY_DIR}/ucxx/build.sh libucxx -n
|
||||
--cmake-args=\"-DBUILD_SHARED_LIBS=OFF
|
||||
-DCMAKE_CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=${USE_CXX11_ABI}\"
|
||||
COMMAND_ECHO STDOUT)
|
||||
find_package(ucxx REQUIRED PATHS ${3RDPARTY_DIR}/ucxx/cpp/build
|
||||
NO_DEFAULT_PATH)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" VERSION_STRINGS
|
||||
REGEX "#define NV_TENSORRT_.*")
|
||||
foreach(TYPE MAJOR MINOR PATCH BUILD)
|
||||
|
||||
@ -41,6 +41,7 @@ auto constexpr kDraftInputIdsTensorName = "draft_input_ids";
|
||||
auto constexpr kDraftLogitsTensorName = "draft_logits";
|
||||
auto constexpr kMaxNewTokensTensorName = "request_output_len";
|
||||
auto constexpr kBeamWidthTensorName = "beam_width";
|
||||
auto constexpr kNumReturnSequencesTensorName = "num_return_sequences";
|
||||
auto constexpr kEndIdTensorName = "end_id";
|
||||
auto constexpr kPadIdTensorName = "pad_id";
|
||||
auto constexpr kBadWordsListTensorName = "bad_words_list";
|
||||
@ -194,6 +195,7 @@ public:
|
||||
inference_request::kDraftLogitsTensorName,
|
||||
inference_request::kMaxNewTokensTensorName,
|
||||
inference_request::kBeamWidthTensorName,
|
||||
inference_request::kNumReturnSequencesTensorName,
|
||||
inference_request::kEndIdTensorName,
|
||||
inference_request::kPadIdTensorName,
|
||||
inference_request::kBadWordsListTensorName,
|
||||
@ -263,6 +265,7 @@ public:
|
||||
TENSOR_GETTER_SETTER(DraftLogits, inference_request::kDraftLogitsTensorName)
|
||||
TENSOR_GETTER_SETTER(MaxNewTokens, inference_request::kMaxNewTokensTensorName)
|
||||
TENSOR_GETTER_SETTER(BeamWidth, inference_request::kBeamWidthTensorName)
|
||||
TENSOR_GETTER_SETTER(NumReturnSequences, inference_request::kNumReturnSequencesTensorName)
|
||||
TENSOR_GETTER_SETTER(EndId, inference_request::kEndIdTensorName)
|
||||
TENSOR_GETTER_SETTER(PadId, inference_request::kPadIdTensorName)
|
||||
TENSOR_GETTER_SETTER(BadWordsList, inference_request::kBadWordsListTensorName)
|
||||
|
||||
@ -85,6 +85,7 @@ public:
|
||||
using TensorPtr = TTensor;
|
||||
using LogitsPostProcessor = std::function<void(
|
||||
RequestIdType, TensorPtr&, BeamTokens const&, TStream const&, std::optional<RequestIdType>)>;
|
||||
using RequestPtr = std::shared_ptr<GenericLlmRequest>;
|
||||
|
||||
GenericLlmRequest(RequestIdType requestId, SizeType32 maxNewTokens, std::shared_ptr<VecTokens> inputTokens,
|
||||
runtime::SamplingConfig const& samplingConfig, bool isStreaming, std::optional<SizeType32> endId = std::nullopt,
|
||||
@ -107,7 +108,8 @@ public:
|
||||
std::optional<TensorPtr> encoderInputFeatures = std::nullopt,
|
||||
std::optional<SizeType32> encoderOutputLength = std::nullopt,
|
||||
LlmRequestType llmRequestType = LlmRequestType::LLMREQUEST_TYPE_CONTEXT_AND_GENERATION,
|
||||
std::optional<std::shared_ptr<VecTokenExtraIds>> inputTokenExtraIds = std::nullopt)
|
||||
std::optional<std::shared_ptr<VecTokenExtraIds>> inputTokenExtraIds = std::nullopt,
|
||||
SizeType32 numReturnSequences = 1)
|
||||
: mRequestId(requestId)
|
||||
, mPromptLen(inputTokens->size())
|
||||
, mMaxNewTokens(maxNewTokens)
|
||||
@ -152,11 +154,14 @@ public:
|
||||
, mEncoderOutputLength(encoderOutputLength)
|
||||
, mLlmRequestType(llmRequestType)
|
||||
, mInputTokenExtraIds(std::move(inputTokenExtraIds))
|
||||
, mNumReturnSequences(numReturnSequences)
|
||||
, mSequenceIndex(0)
|
||||
{
|
||||
if (mEncoderTokens.has_value() || encoderInputFeatures.has_value())
|
||||
{
|
||||
mState = REQUEST_STATE_ENCODER_INIT;
|
||||
}
|
||||
|
||||
initialize(*inputTokens, returnLogProbs);
|
||||
}
|
||||
|
||||
@ -202,6 +207,8 @@ public:
|
||||
, mEncoderOutputLength(req.getEncoderOutputLength())
|
||||
, mContextPhaseParams(req.getContextPhaseParams())
|
||||
, mInputTokenExtraIds(std::nullopt)
|
||||
, mNumReturnSequences(req.getNumReturnSequences())
|
||||
, mSequenceIndex(0)
|
||||
{
|
||||
if (req.getRequestType() == executor::RequestType::REQUEST_TYPE_GENERATION_ONLY)
|
||||
{
|
||||
@ -217,6 +224,7 @@ public:
|
||||
"length).");
|
||||
mReturnAllGeneratedTokens = true;
|
||||
}
|
||||
|
||||
if (mIsStreaming && mSamplingConfig.beamWidth > 1 && mReturnGenerationLogits == true)
|
||||
{
|
||||
TLLM_LOG_WARNING(
|
||||
@ -276,13 +284,15 @@ public:
|
||||
mLoraTaskId = loraConfig->getTaskId();
|
||||
if (loraConfig.value().getWeights())
|
||||
{
|
||||
mLoraWeights = executor::detail::toITensor(loraConfig.value().getWeights().value());
|
||||
mLoraWeights = tensorrt_llm::runtime::ITensor::view(
|
||||
executor::detail::toITensor(loraConfig.value().getWeights().value()));
|
||||
mLoraWeights.value()->unsqueeze(0);
|
||||
}
|
||||
|
||||
if (loraConfig.value().getConfig())
|
||||
{
|
||||
mLoraConfig = executor::detail::toITensor(loraConfig.value().getConfig().value());
|
||||
mLoraConfig = tensorrt_llm::runtime::ITensor::view(
|
||||
executor::detail::toITensor(loraConfig.value().getConfig().value()));
|
||||
mLoraConfig.value()->unsqueeze(0);
|
||||
}
|
||||
}
|
||||
@ -429,6 +439,20 @@ public:
|
||||
return mTokens.at(beam).size() - mNumPreDecodedTokens[beam];
|
||||
}
|
||||
|
||||
/// @brief Get number of return sequences for this req.
|
||||
/// @return The number of sequences to return.
|
||||
[[nodiscard]] SizeType32 getNumReturnSequences() const
|
||||
{
|
||||
return mNumReturnSequences;
|
||||
}
|
||||
|
||||
/// @brief Get child requests spawned by this req.
|
||||
/// @return A vector of child requests.
|
||||
[[nodiscard]] std::vector<RequestPtr> const& getChildRequests() const
|
||||
{
|
||||
return mChildRequests;
|
||||
}
|
||||
|
||||
/// @brief Get max number of tokens across all beams
|
||||
/// @return The number of tokens
|
||||
[[nodiscard]] SizeType32 getMaxBeamNumTokens() const
|
||||
@ -618,6 +642,25 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Sets the number of return sequences.
|
||||
/// @param numReturnSequences The number of return sequences.
|
||||
void setNumReturnSequences(SizeType32 const& numReturnSequences)
|
||||
{
|
||||
TLLM_CHECK_WITH_INFO(!isChild(), "A child request cannot change numReturnSequences.");
|
||||
TLLM_CHECK_WITH_INFO(
|
||||
numReturnSequences > 0, "numReturnSequences should be a positive integer, got %d.", numReturnSequences);
|
||||
TLLM_CHECK_WITH_INFO(mChildRequests.size() <= static_cast<size_t>(numReturnSequences),
|
||||
"Cannot set numReturnSequences %d smaller than the number %ld of child requests that have already created.",
|
||||
numReturnSequences, mChildRequests.size());
|
||||
mNumReturnSequences = numReturnSequences;
|
||||
mSequenceFinalVec->resize(mNumReturnSequences);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool constexpr isChild() const noexcept
|
||||
{
|
||||
return mSequenceIndex > 0;
|
||||
}
|
||||
|
||||
/// @brief Return a vector of the last-generated tokens of shape [num_beams]
|
||||
[[nodiscard]] VecTokens const& getLastTokens()
|
||||
{
|
||||
@ -886,6 +929,11 @@ public:
|
||||
mEncoderOutputHost = std::move(encoderOutputHost);
|
||||
}
|
||||
|
||||
void setEncoderOutput(TensorPtr encoderOutput)
|
||||
{
|
||||
mEncoderOutput = std::move(encoderOutput);
|
||||
}
|
||||
|
||||
void allocEncoderOutputHost(SizeType32 encoderHiddenSize, nvinfer1::DataType dataType)
|
||||
{
|
||||
mEncoderOutputHost = runtime::BufferManager::pinned(
|
||||
@ -1204,7 +1252,14 @@ public:
|
||||
TLLM_LOG_DEBUG("Creating response for request %lu", mRequestId);
|
||||
|
||||
executor::Result result;
|
||||
result.isFinal = isGenerationCompleteState() || isDisaggContextTransmissionState();
|
||||
result.sequenceIndex = mSequenceIndex;
|
||||
|
||||
result.isSequenceFinal = isGenerationCompleteState() || isDisaggContextTransmissionState();
|
||||
mSequenceFinalVec->at(mSequenceIndex) = result.isSequenceFinal;
|
||||
|
||||
result.isFinal = std::all_of(mSequenceFinalVec->begin(), mSequenceFinalVec->end(),
|
||||
[](bool isSequenceFinal) { return isSequenceFinal; });
|
||||
|
||||
auto const nbBeams = mSamplingConfig.beamWidth;
|
||||
auto const maxNbTokens = getMaxBeamNumTokens();
|
||||
|
||||
@ -1295,7 +1350,9 @@ public:
|
||||
// Update position of last sent response
|
||||
setMaxSentTokenLen(maxNbTokens);
|
||||
|
||||
auto response = executor::Response(mRequestId, std::move(result));
|
||||
auto requestId = isChild() ? mParentRequestId : mRequestId;
|
||||
auto response = executor::Response(requestId, std::move(result));
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
@ -1413,6 +1470,12 @@ protected:
|
||||
// TODO: add real extra id for encoder tokens
|
||||
std::optional<std::shared_ptr<VecUniqueTokens>> mEncoderUniqueTokens;
|
||||
|
||||
SizeType32 mNumReturnSequences;
|
||||
SizeType32 mSequenceIndex;
|
||||
std::vector<RequestPtr> mChildRequests;
|
||||
RequestIdType mParentRequestId;
|
||||
std::shared_ptr<std::vector<bool>> mSequenceFinalVec; // Indicators whether each sibling completes generation.
|
||||
|
||||
private:
|
||||
void initialize(VecTokens const& inputTokens, bool outputLogProbs)
|
||||
{
|
||||
@ -1475,6 +1538,12 @@ private:
|
||||
}
|
||||
|
||||
setReturnLogProbs(outputLogProbs);
|
||||
|
||||
if (!isChild())
|
||||
{
|
||||
// Initialize result states unless it is a child and a child request should share parent's one.
|
||||
mSequenceFinalVec = std::make_shared<std::vector<bool>>(getNumReturnSequences(), false);
|
||||
}
|
||||
}
|
||||
|
||||
TensorPtr createListTensor(std::list<VecTokens> const& wordsList)
|
||||
@ -1540,7 +1609,8 @@ public:
|
||||
std::optional<TensorPtr> encoderInputFeatures = std::nullopt,
|
||||
std::optional<SizeType32> encoderOutputLength = std::nullopt,
|
||||
LlmRequestType llmRequestType = LlmRequestType::LLMREQUEST_TYPE_CONTEXT_AND_GENERATION,
|
||||
std::optional<std::shared_ptr<VecTokenExtraIds>> inputTokenExtraIds = std::nullopt)
|
||||
std::optional<std::shared_ptr<VecTokenExtraIds>> inputTokenExtraIds = std::nullopt,
|
||||
SizeType32 numReturnSequences = 1)
|
||||
: Base(requestId, maxNewTokens, std::move(inputTokens), samplingConfig, isStreaming, endId, padId,
|
||||
std::move(embeddingBias), std::move(badWordsList), std::move(stopWordsList), std::move(positionIds),
|
||||
std::move(promptEmbeddingTable), promptVocabSize, loraTaskId, std::move(loraWeights), std::move(loraConfig),
|
||||
@ -1548,18 +1618,49 @@ public:
|
||||
std::move(draftTokens), std::move(draftLogits), excludeInputFromOutput, std::move(logitsPostProcessor),
|
||||
applyLogitsPostProcessorBatched, std::move(encoderInputTokens), returnEncoderOutput, clientId, priority,
|
||||
std::move(encoderInputFeatures), std::move(encoderOutputLength), llmRequestType,
|
||||
std::move(inputTokenExtraIds))
|
||||
std::move(inputTokenExtraIds), numReturnSequences)
|
||||
{
|
||||
}
|
||||
|
||||
LlmRequest(RequestIdType requestId, executor::Request const& Request,
|
||||
LlmRequest(RequestIdType requestId, executor::Request const& request,
|
||||
std::optional<Base::LogitsPostProcessor> logitsPostProcessor = std::nullopt,
|
||||
bool applyLogitsPostProcessorBatched = false)
|
||||
: Base(requestId, Request)
|
||||
: Base(requestId, request)
|
||||
{
|
||||
mLogitsPostProcessor = std::move(logitsPostProcessor);
|
||||
mApplyLogitsPostProcessorBatched = applyLogitsPostProcessorBatched;
|
||||
mLookaheadConfig = Request.getLookaheadConfig();
|
||||
mLookaheadConfig = request.getLookaheadConfig();
|
||||
}
|
||||
|
||||
std::shared_ptr<LlmRequest> createChildRequest(RequestIdType requestId)
|
||||
{
|
||||
TLLM_CHECK_WITH_INFO(!isChild(), "A child request cannot create its own child.");
|
||||
TLLM_CHECK_WITH_INFO(mChildRequests.size() + 1 < static_cast<size_t>(getNumReturnSequences()),
|
||||
"Cannot create child requests more than the number of return sequences (%d)", getNumReturnSequences());
|
||||
auto childReq = std::make_shared<LlmRequest>(*this);
|
||||
childReq->mRequestId = requestId;
|
||||
childReq->mSequenceIndex = mChildRequests.size() + 1;
|
||||
childReq->mParentRequestId = this->mRequestId;
|
||||
childReq->mSequenceFinalVec = this->mSequenceFinalVec;
|
||||
childReq->mSeqSlot.reset();
|
||||
|
||||
// To ensure different randomness across children, assign a unique random seed to each child
|
||||
// by adding its sequence index to the base seed. If no seed is provided, the parent's seed defaults to 0.
|
||||
using RandomSeedType = tensorrt_llm::executor::RandomSeedType;
|
||||
if (childReq->mSamplingConfig.randomSeed.has_value())
|
||||
{
|
||||
childReq->mSamplingConfig.randomSeed->at(0) += static_cast<RandomSeedType>(childReq->mSequenceIndex);
|
||||
}
|
||||
else
|
||||
{
|
||||
RandomSeedType defaultSeed{0};
|
||||
mSamplingConfig.randomSeed = std::vector<RandomSeedType>(1, defaultSeed);
|
||||
childReq->mSamplingConfig.randomSeed
|
||||
= std::vector<RandomSeedType>(1, defaultSeed + static_cast<RandomSeedType>(childReq->mSequenceIndex));
|
||||
}
|
||||
|
||||
mChildRequests.push_back(childReq);
|
||||
return childReq;
|
||||
}
|
||||
|
||||
void movePromptEmbeddingTableToGpu(runtime::BufferManager const& manager)
|
||||
|
||||
@ -32,7 +32,7 @@ using runtime::SizeType32;
|
||||
struct PeftCacheManagerConfig
|
||||
{
|
||||
|
||||
static float constexpr kDefaultDeviceCachePercent = 0.05;
|
||||
static float constexpr kDefaultDeviceCachePercent = 0.02;
|
||||
static size_t constexpr kDefaultHostCacheSize = 1024 * 1024 * 1024;
|
||||
|
||||
explicit PeftCacheManagerConfig(SizeType32 numHostModuleLayer = 0, SizeType32 numDeviceModuleLayer = 0,
|
||||
|
||||
@ -78,15 +78,6 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor
|
||||
TrtGptModelOptionalParams(TrtGptModelOptionalParams const& other)
|
||||
: TrtGptModelOptionalParams(other.kvCacheConfig, other.enableTrtOverlap, other.deviceIds,
|
||||
other.normalizeLogProbs, other.enableChunkedContext, other.peftCacheManagerConfig, other.decodingConfig,
|
||||
other.gpuWeightsPercent, other.maxBeamWidth, other.maxBatchSize, other.maxNumTokens, other.schedulerConfig,
|
||||
other.extendedRuntimePerfKnobConfig, other.debugConfig, other.maxSeqIdleMicroseconds)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(TrtGptModelOptionalParams const& other) const
|
||||
{
|
||||
return kvCacheConfig == other.kvCacheConfig //
|
||||
|
||||
38
cpp/include/tensorrt_llm/common/utils.h
Normal file
38
cpp/include/tensorrt_llm/common/utils.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
namespace tensorrt_llm::common
|
||||
{
|
||||
|
||||
inline bool setThreadName(std::string const& name)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return false;
|
||||
#else
|
||||
auto const ret = pthread_setname_np(pthread_self(), name.c_str());
|
||||
return !ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace tensorrt_llm::common
|
||||
@ -24,6 +24,7 @@
|
||||
#include <deque>
|
||||
#include <filesystem>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
@ -343,6 +344,7 @@ public:
|
||||
/// convolution down-sampling, etc.)
|
||||
/// @param type Indicate the request type for disaggregated serving mode.
|
||||
/// @param contextPhaseParams Generated token ID from context only executor.
|
||||
/// @param numReturnSequences The number of returning sequences.
|
||||
Request(VecTokens inputTokenIds, SizeType32 maxTokens, bool streaming = false,
|
||||
SamplingConfig const& samplingConfig = SamplingConfig(), OutputConfig const& outputConfig = OutputConfig(),
|
||||
std::optional<SizeType32> const& endId = std::nullopt, std::optional<SizeType32> const& padId = std::nullopt,
|
||||
@ -360,7 +362,7 @@ public:
|
||||
RequestType type = RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION,
|
||||
std::optional<ContextPhaseParams> contextPhaseParams = std::nullopt,
|
||||
std::optional<Tensor> encoderInputFeatures = std::nullopt,
|
||||
std::optional<SizeType32> encoderOutputLength = std::nullopt);
|
||||
std::optional<SizeType32> encoderOutputLength = std::nullopt, SizeType32 numReturnSequences = 1);
|
||||
|
||||
/// @brief This logits postprocessor name will dispatch to the batched logits postprocessor
|
||||
static auto constexpr kBatchedPostProcessorName = "batched";
|
||||
@ -396,6 +398,7 @@ public:
|
||||
[[nodiscard]] std::optional<Tensor> getEncoderInputFeatures() const;
|
||||
[[nodiscard]] std::optional<SizeType32> getEncoderOutputLength() const;
|
||||
[[nodiscard]] RequestType getRequestType() const;
|
||||
[[nodiscard]] SizeType32 getNumReturnSequences() const;
|
||||
|
||||
void setStreaming(bool streaming);
|
||||
void setSamplingConfig(SamplingConfig const& config);
|
||||
@ -419,6 +422,7 @@ public:
|
||||
void setContextPhaseParams(ContextPhaseParams contextPhaseParams);
|
||||
void setEncoderInputFeatures(Tensor encoderInputFeatures);
|
||||
void setEncoderOutputLength(SizeType32 encoderOutputLength);
|
||||
void setNumReturnSequences(SizeType32 numReturnSequences);
|
||||
|
||||
private:
|
||||
friend class Serialization;
|
||||
@ -461,6 +465,12 @@ struct Result
|
||||
|
||||
/// @brief The decoding iterations it takes.
|
||||
SizeType32 decodingIter{0};
|
||||
|
||||
/// @brief The index of the output sequence where 0 <= sequenceIndex < numReturnSequences
|
||||
SizeType32 sequenceIndex{0};
|
||||
|
||||
/// @brief Indicates if this is the final result for a given sequence in the request
|
||||
bool isSequenceFinal;
|
||||
};
|
||||
|
||||
/// @brief Class that holds either an error or a result
|
||||
@ -583,7 +593,7 @@ private:
|
||||
class ExtendedRuntimePerfKnobConfig
|
||||
{
|
||||
public:
|
||||
explicit ExtendedRuntimePerfKnobConfig(bool multiBlockMode = false, bool enableContextFMHAFP32Acc = false);
|
||||
explicit ExtendedRuntimePerfKnobConfig(bool multiBlockMode = true, bool enableContextFMHAFP32Acc = false);
|
||||
|
||||
bool operator==(ExtendedRuntimePerfKnobConfig const& other) const
|
||||
{
|
||||
@ -612,27 +622,33 @@ class DebugConfig
|
||||
using StringVec = std::vector<std::string>;
|
||||
|
||||
public:
|
||||
explicit DebugConfig(bool dumpInputTensors = false, bool dumpOuputTensors = false, StringVec debugTensorNames = {});
|
||||
explicit DebugConfig(bool debugInputTensors = false, bool debugOutputTensors = false,
|
||||
StringVec debugTensorNames = {}, SizeType32 debugTensorsMaxIterations = 0);
|
||||
|
||||
bool operator==(DebugConfig const& other) const;
|
||||
|
||||
[[nodiscard]] bool getDumpInputTensors() const;
|
||||
[[nodiscard]] bool getDumpOutputTensors() const;
|
||||
[[nodiscard]] bool getDebugInputTensors() const;
|
||||
[[nodiscard]] bool getDebugOutputTensors() const;
|
||||
[[nodiscard]] StringVec const& getDebugTensorNames() const;
|
||||
[[nodiscard]] SizeType32 getDebugTensorsMaxIterations() const;
|
||||
|
||||
void setDumpInputTensors(bool dumpInputTensors);
|
||||
void setDumpOuputTensors(bool dumpOuputTensors);
|
||||
void setDebugInputTensors(bool debugInputTensors);
|
||||
void setDebugOutputTensors(bool debugOutputTensors);
|
||||
void setDebugTensorNames(StringVec const& debugTensorNames);
|
||||
void setDebugTensorsMaxIterations(SizeType32 debugTensorsMaxIterations);
|
||||
|
||||
private:
|
||||
friend class Serialization;
|
||||
|
||||
/// @brief If true, dump all input tensors.
|
||||
bool mDumpInputTensors;
|
||||
/// @brief If true, dump all output tensors.
|
||||
bool mDumpOuputTensors;
|
||||
/// @brief If not empty, only dump tensors in this list.
|
||||
/// @brief If true, debug all input tensors.
|
||||
bool mDebugInputTensors;
|
||||
/// @brief If true, debug all output tensors.
|
||||
bool mDebugOutputTensors;
|
||||
/// @brief If not empty, only debug tensors in this list.
|
||||
StringVec mDebugTensorNames;
|
||||
/// @brief If > 0, provide debug tensors for at most debugTensorsMaxIterations past iterations,
|
||||
/// else dump them to files.
|
||||
SizeType32 mDebugTensorsMaxIterations;
|
||||
};
|
||||
|
||||
SizeType32 const kDefaultIterStatsMaxIterations = 1000;
|
||||
@ -960,7 +976,8 @@ public:
|
||||
ModelType modelType, ExecutorConfig const& executorConfig);
|
||||
|
||||
Executor(BufferView const& engineBuffer, std::string const& jsonConfigStr, ModelType modelType,
|
||||
ExecutorConfig const& executorConfig);
|
||||
ExecutorConfig const& executorConfig,
|
||||
std::optional<std::map<std::string, Tensor>> const& managedWeights = std::nullopt);
|
||||
|
||||
Executor(BufferView const& encoderEngineBuffer, std::string const& encoderJsonConfigStr,
|
||||
BufferView const& decoderEngineBuffer, std::string const& decoderJsonConfigStr, ModelType modelType,
|
||||
@ -1021,20 +1038,25 @@ public:
|
||||
/// @param id The request id for which to cancel the response
|
||||
void cancelRequest(IdType requestId);
|
||||
|
||||
/// @brief Signals the server to shutdown
|
||||
/// This call is blocking. Only returns when all requests have terminated or timeout has been reached
|
||||
/// @brief Signals the server to shutdown.
|
||||
/// @details This call is blocking. Only returns when all requests have terminated or timeout has been reached
|
||||
void shutdown();
|
||||
|
||||
/// @brief Returns the per-iterations statistics computed since last call to getLatestIterationStats
|
||||
/// Contains at most iterStatsMaxIterations iterations
|
||||
/// @brief Returns the per-iterations statistics computed since last call to getLatestIterationStats.
|
||||
/// Contains at most iterStatsMaxIterations iterations.
|
||||
/// @return Iteration stats
|
||||
std::deque<IterationStats> getLatestIterationStats();
|
||||
|
||||
/// @brief Returns the request stats of each iteration computed since last call to getLatestRequestStats
|
||||
/// Contains at most requestStatsMaxIterations iterations
|
||||
/// @brief Returns the request stats of each iteration computed since last call to getLatestRequestStats.
|
||||
/// Contains at most requestStatsMaxIterations iterations.
|
||||
/// @return Request stats grouped by iterations
|
||||
std::deque<RequestStatsPerIteration> getLatestRequestStats();
|
||||
|
||||
/// @brief Returns the debug tensors of each iteration computed since last call to getLatestDebugTensors.
|
||||
/// Contains at most debugTensorsMaxIterations iterations.
|
||||
/// @return Request debug tensors grouped by iterations
|
||||
std::deque<DebugTensorsPerIteration> getLatestDebugTensors();
|
||||
|
||||
/// @brief Indicates if the current process is allowed to enqueueRequests
|
||||
[[nodiscard]] bool canEnqueueRequests() const;
|
||||
|
||||
|
||||
@ -25,6 +25,13 @@
|
||||
namespace tensorrt_llm::executor
|
||||
{
|
||||
|
||||
namespace kv_cache
|
||||
{
|
||||
class CommState;
|
||||
class CacheState;
|
||||
class SocketState;
|
||||
} // namespace kv_cache
|
||||
|
||||
class Serialization
|
||||
{
|
||||
public:
|
||||
@ -53,6 +60,21 @@ public:
|
||||
static void serialize(LoraConfig const& config, std::ostream& os);
|
||||
[[nodiscard]] static size_t serializedSize(LoraConfig const& config);
|
||||
|
||||
// CommState
|
||||
[[nodiscard]] static kv_cache::CommState deserializeCommState(std::istream& is);
|
||||
static void serialize(kv_cache::CommState const& state, std::ostream& os);
|
||||
[[nodiscard]] static size_t serializedSize(kv_cache::CommState const& state);
|
||||
|
||||
// SocketState
|
||||
[[nodiscard]] static kv_cache::SocketState deserializeSocketState(std::istream& is);
|
||||
static void serialize(kv_cache::SocketState const& state, std::ostream& os);
|
||||
[[nodiscard]] static size_t serializedSize(kv_cache::SocketState const& state);
|
||||
|
||||
// CacheState
|
||||
[[nodiscard]] static kv_cache::CacheState deserializeCacheState(std::istream& is);
|
||||
static void serialize(kv_cache::CacheState const& state, std::ostream& os);
|
||||
[[nodiscard]] static size_t serializedSize(kv_cache::CacheState const& state);
|
||||
|
||||
// ContextPhaseState
|
||||
[[nodiscard]] static ContextPhaseState deserializeContextPhaseState(std::istream& is);
|
||||
static void serialize(ContextPhaseState const& contextPhaseState, std::ostream& os);
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
@ -361,6 +362,15 @@ struct RequestStatsPerIteration
|
||||
std::vector<RequestStats> requestStats;
|
||||
};
|
||||
|
||||
/// @brief Struct that holds the debug tensors in an iteration
|
||||
struct DebugTensorsPerIteration
|
||||
{
|
||||
/// @brief The iteration id for these tensors
|
||||
IterationType iter;
|
||||
/// @brief The debug tensors for this iteration
|
||||
std::map<std::string, Tensor> debugTensors;
|
||||
};
|
||||
|
||||
/// @brief The reason why the model stopped generating tokens for a request.
|
||||
enum class FinishReason
|
||||
{
|
||||
|
||||
@ -32,7 +32,7 @@ public:
|
||||
using BufferPtr = IBuffer::SharedPtr;
|
||||
|
||||
// MAX_ALL_REDUCE_BLOCKS for block_barrier, 1 for multi_gpu_barrier
|
||||
size_t static constexpr FLAGS_SIZE = (kernels::MAX_ALL_REDUCE_BLOCKS + 1) * sizeof(uint32_t);
|
||||
size_t static constexpr FLAGS_SIZE = (tensorrt_llm::kernels::MAX_ALL_REDUCE_BLOCKS + 1) * sizeof(uint32_t);
|
||||
|
||||
IpcMemory(
|
||||
std::size_t bufferSize, BufferManager const& manager, WorldConfig const& worldConfig, bool openIpc = true);
|
||||
|
||||
@ -17,9 +17,11 @@
|
||||
#pragma once
|
||||
|
||||
#include "tensorrt_llm/common/assert.h"
|
||||
#include "tensorrt_llm/executor/tensor.h"
|
||||
|
||||
#include <NvInferRuntime.h>
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
|
||||
namespace tensorrt_llm::runtime
|
||||
@ -75,6 +77,17 @@ public:
|
||||
mEnginePath = std::move(enginePath);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<std::map<std::string, tensorrt_llm::executor::Tensor>> const&
|
||||
getManagedWeightsMapOpt() const
|
||||
{
|
||||
return mManagedWeightsMap;
|
||||
}
|
||||
|
||||
void setManagedWeightsMap(std::map<std::string, tensorrt_llm::executor::Tensor> managedWeightsMap)
|
||||
{
|
||||
mManagedWeightsMap = std::move(managedWeightsMap);
|
||||
}
|
||||
|
||||
[[nodiscard]] void const* getAddress() const
|
||||
{
|
||||
TLLM_CHECK(mType == AddressWithSize);
|
||||
@ -104,6 +117,7 @@ private:
|
||||
};
|
||||
|
||||
nvinfer1::IHostMemory const* mEngineBuffer{};
|
||||
std::optional<std::map<std::string, tensorrt_llm::executor::Tensor>> mManagedWeightsMap;
|
||||
};
|
||||
|
||||
} // namespace tensorrt_llm::runtime
|
||||
|
||||
@ -186,6 +186,16 @@ find_package(Threads REQUIRED)
|
||||
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE Threads::Threads)
|
||||
target_link_libraries(${EXECUTOR_TARGET} INTERFACE Threads::Threads)
|
||||
|
||||
if(ENABLE_UCX)
|
||||
find_package(ucx REQUIRED)
|
||||
find_package(ucxx REQUIRED)
|
||||
if(BUILD_BATCH_MANAGER)
|
||||
target_include_directories(
|
||||
${BATCH_MANAGER_TARGET}
|
||||
PRIVATE $<TARGET_PROPERTY:ucxx::ucxx,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
if(USE_CXX11_ABI)
|
||||
add_custom_command(
|
||||
@ -331,6 +341,10 @@ if(ENABLE_MULTI_DEVICE)
|
||||
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} ${MPI_C_LIBRARIES} ${NCCL_LIB})
|
||||
endif()
|
||||
|
||||
if(ENABLE_UCX)
|
||||
set(TRTLLM_LINK_LIBS ${TRTLLM_LINK_LIBS} ucxx::ucxx ucx::ucs)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32) # Unix-like compilers
|
||||
set(UNDEFINED_FLAG "-Wl,--no-undefined")
|
||||
set(AS_NEEDED_FLAG "-Wl,--as-needed")
|
||||
@ -366,6 +380,9 @@ target_link_libraries(${SHARED_TARGET} PUBLIC ${TRTLLM_LINK_LIBS})
|
||||
link_whole_archive(${SHARED_TARGET} ${BATCH_MANAGER_TARGET})
|
||||
link_whole_archive(${SHARED_TARGET} ${EXECUTOR_TARGET})
|
||||
link_whole_archive(${SHARED_TARGET} ${INTERNAL_CUTLASS_KERNELS_TARGET})
|
||||
if(ENABLE_UCX)
|
||||
link_whole_archive(${SHARED_TARGET} ucxx::ucxx)
|
||||
endif()
|
||||
|
||||
# Cyclic dependency of batch manager on TRT-LLM
|
||||
target_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE ${SHARED_TARGET})
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5837518b278aa82cfdaeb3279bfe396de8e0638d31c3447f2eaa7443c22fa3f7
|
||||
size 4459926
|
||||
oid sha256:1ce35a0714ef753c5328aa982b1fefa58b90994bd87a6739634ec47ec9373f9e
|
||||
size 4565552
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ff7c6c16dc4755cfcf398d6b92ddd056ad6ae40f1dd830deeccd37ceb795edb6
|
||||
size 4567634
|
||||
oid sha256:1aa2a508d865410915b9ae4b21b11062a07d6143d19bc84fa53145da7911aa2a
|
||||
size 4667530
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
fe9c16bd1eed122234ece7f9afeea382 libtensorrt_llm_batch_manager_static.a
|
||||
040e15c175f987c30ebfdbcc8a9c2021 libtensorrt_llm_batch_manager_static.pre_cxx11.a
|
||||
052edd4c2bca0a186eed2169a9681d317f67a712 commit
|
||||
7a30229eedc22a924052cd5440c5adb4 libtensorrt_llm_batch_manager_static.a
|
||||
e46c1e13209f90acdcc8b5f0c9e8a15c libtensorrt_llm_batch_manager_static.pre_cxx11.a
|
||||
0e09af04945b8cb0eaf65780a7fa7907a2baee1a commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1a7311134408b791dbd4f11ad3144f67314a8e6a288d14f12767004d79a82ac2
|
||||
size 4318978
|
||||
oid sha256:106cab5936a2ac034785050804890aa4deb1436983215439462c244475ebb90c
|
||||
size 4422078
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8181b7a634293d981caf0f5011c2619f541c02e818aae8fbff4bf4e72cea6cab
|
||||
size 4291752
|
||||
oid sha256:cb9ad53702b1fbe66dbb989a50422ab4cf01c53b4943d48296cc96f92742c363
|
||||
size 4388652
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
3869999ed0175550deb0d73d0ab0fd08 libtensorrt_llm_batch_manager_static.a
|
||||
e6e05e4c36d868dfb1f9c93c77993cbd libtensorrt_llm_batch_manager_static.pre_cxx11.a
|
||||
052edd4c2bca0a186eed2169a9681d317f67a712 commit
|
||||
251ebd85cff41a2af7f6dcca8489f8fb libtensorrt_llm_batch_manager_static.a
|
||||
801b1b6ffd0ab4ec3a66afeb010d97c4 libtensorrt_llm_batch_manager_static.pre_cxx11.a
|
||||
0e09af04945b8cb0eaf65780a7fa7907a2baee1a commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ce95b46f9ae1ff46967984f0c956a0b51bc7c57cebbf0ed6e553729ce84fe8b6
|
||||
size 26424318
|
||||
oid sha256:291630c536d8262087c9dd5f3bbd4c9b301aea8afbae4af9bc2cdce4db4e8f23
|
||||
size 27510016
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
6c8dbccd4cde7ca451e8e99ecb480f55 tensorrt_llm_batch_manager_static.lib
|
||||
052edd4c2bca0a186eed2169a9681d317f67a712 commit
|
||||
8873c98ec05794c5ebaf05c8da73dd65 tensorrt_llm_batch_manager_static.lib
|
||||
0e09af04945b8cb0eaf65780a7fa7907a2baee1a commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:356deeea43040cf5529f89bd880028b8cf0a0600967df382b4107e796d9a301c
|
||||
size 1630654
|
||||
oid sha256:1f7087b56c34700e048ee9d40086b34d65952e66507ea36986ab11260e0a3300
|
||||
size 1759444
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:115bf7be63c22bb7fd69cafa600ebe9785b104e78e1a7e5b475bcbe1d2069037
|
||||
size 1655182
|
||||
oid sha256:3288603c000a6eecb7a28319c69536ac0b37b2337474330b61442b9940e1d988
|
||||
size 1787862
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
aae0acac4fab096666be84b6e630bd71 libtensorrt_llm_executor_static.a
|
||||
a174142f8f74f1c1a439cc3d040b0b5f libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
052edd4c2bca0a186eed2169a9681d317f67a712 commit
|
||||
cfe12cab670a58d56b0a2d881c218015 libtensorrt_llm_executor_static.a
|
||||
b177ad21ac5636091ee267a5a550aa77 libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
0e09af04945b8cb0eaf65780a7fa7907a2baee1a commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e5beefedcd2309d37550bac7e9d9ec0cf7f7c18d2fbf9a2d9bf9c7625954b6c8
|
||||
size 1694400
|
||||
oid sha256:f565a5225dec3f1f88df931d8f8a0718f2ab24f705a639ae4ebf358b37b4555d
|
||||
size 1824992
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:89acdd3e6b904f9dc86bcf8e73fe03c43b82ceab5d325357ea1f3ed1ef797aaf
|
||||
size 1615086
|
||||
oid sha256:21aaf66b1435d4fec2f41ce0029522da71509cb7c6a856bbf9411c88c105cd5c
|
||||
size 1735024
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
5bebbd31919ac2b34579c8653295dfd2 libtensorrt_llm_executor_static.a
|
||||
2332b6ea1e0b8683844168949f9dfb9c libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
052edd4c2bca0a186eed2169a9681d317f67a712 commit
|
||||
4dcc8b42ff4afe11178bc2f145394b41 libtensorrt_llm_executor_static.a
|
||||
ead139d0835f7e86d2bf7ecd41ad0999 libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
0e09af04945b8cb0eaf65780a7fa7907a2baee1a commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6fceeee8a39af844a5cfefd32d3b2b024b659f5d7dceacc0f5dd0b69b5d37b7c
|
||||
size 17485396
|
||||
oid sha256:9763058bec6c637ec101384b9e89681ff315ddd514fb1e37fde8ef5c51de540a
|
||||
size 19341056
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
ac61b12b5aa440f5f8f0e05511a12d17 tensorrt_llm_executor_static.lib
|
||||
052edd4c2bca0a186eed2169a9681d317f67a712 commit
|
||||
94894f3c80436b5dfbb6864dbe686baa tensorrt_llm_executor_static.lib
|
||||
0e09af04945b8cb0eaf65780a7fa7907a2baee1a commit
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user