From 12763779c4bb74ce32b8c53916a0363e514fcbaf Mon Sep 17 00:00:00 2001
From: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
Date: Wed, 28 May 2025 16:32:59 +0200
Subject: [PATCH] chore: Clean up cpp runtime (#4449)

Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
---
 .../batch_manager/transformerBuffers.h        |  2 -
 .../tensorrt_llm/runtime/decoderState.h       |  6 +++
 .../createNewDecoderRequests.cpp              |  7 ++-
 .../batch_manager/transformerBuffers.cpp      | 12 -----
 .../trtGptModelInflightBatching.cpp           |  7 ++-
 .../batch_manager/updateDecoderBuffers.cpp    |  3 +-
 cpp/tensorrt_llm/runtime/decoderState.cpp     | 50 ++++++++-----------
 .../runtime/gptDecoderBatched.cpp             |  3 +-
 cpp/tensorrt_llm/runtime/runtimeKernels.h     |  2 +-
 cpp/tests/runtime/gptDecoderBatchedTest.cpp   | 22 ++++----
 10 files changed, 47 insertions(+), 67 deletions(-)
diff --git a/cpp/include/tensorrt_llm/batch_manager/transformerBuffers.h b/cpp/include/tensorrt_llm/batch_manager/transformerBuffers.h
index 6ce278110c..17f23c8c12 100644
--- a/cpp/include/tensorrt_llm/batch_manager/transformerBuffers.h
+++ b/cpp/include/tensorrt_llm/batch_manager/transformerBuffers.h
@@ -119,8 +119,6 @@ public:
 
     void getBuffers(TensorMap& inputBuffers, TensorMap& outputBuffers, runtime::ModelConfig const& modelConfig) const;
 
-    void reshapePositionIds(std::vector<SizeType32> const& positionIdsHost, bool isChatGlm);
-
     void copyPositionIds(runtime::TllmRuntime const& runtime, std::vector<SizeType32> const& positionIdsHost,
         bool isChatGlm, TensorPtr const& decoderPositionIds);
 
diff --git a/cpp/include/tensorrt_llm/runtime/decoderState.h b/cpp/include/tensorrt_llm/runtime/decoderState.h
index ca07aba745..934d100329 100644
--- a/cpp/include/tensorrt_llm/runtime/decoderState.h
+++ b/cpp/include/tensorrt_llm/runtime/decoderState.h
@@ -118,6 +118,10 @@ public:
     //! @returns [batchSize, maxBeamWidth], sequence lengths, on gpu
     [[nodiscard]] TensorPtr getSequenceLengths() const;
 
+    //! @param batchIdx index of the batch
+    //! @returns [maxBeamWidth], sequence lengths for request `batchIdx`, on gpu
+    [[nodiscard]] TensorPtr getSequenceLengths(SizeType32 batchIdx) const;
+
     //! @brief Get maxTokensPerStep tokens generated in the last forward pass
     //! @returns [maxTokensPerStep, batchSize, maxBeamWidth], tokens generated in last forward pass, on gpu
     [[nodiscard]] TensorPtr getAllNewTokens() const;
@@ -140,6 +144,8 @@ public:
     //! @returns [maxTokensPerStep, batchSize, beamWidth], finished states of type FinishedState, on gpu
     [[nodiscard]] TensorPtr getFinishedSteps() const;
 
+    [[nodiscard]] SizeType32 getMaxBatchSize() const;
+
     [[nodiscard]] SizeType32 getMaxBeamWidth() const;
 
     [[nodiscard]] SizeType32 getMaxSequenceLength() const;
diff --git a/cpp/tensorrt_llm/batch_manager/createNewDecoderRequests.cpp b/cpp/tensorrt_llm/batch_manager/createNewDecoderRequests.cpp
index b736c45d22..c4a5e8febe 100644
--- a/cpp/tensorrt_llm/batch_manager/createNewDecoderRequests.cpp
+++ b/cpp/tensorrt_llm/batch_manager/createNewDecoderRequests.cpp
@@ -169,13 +169,12 @@ void CreateNewDecoderRequests::newRequest(SizeType32 batchSlot, runtime::decoder
 
     BufferManager manager{std::make_shared<CudaStream>(decoderStream.get())};
 
-    auto const& jointOutputIdsShape = decoderState.getJointDecodingOutput().ids->getShape();
-    auto const batchSize = jointOutputIdsShape.d[0];
+    auto const batchSize = decoderState.getMaxBatchSize();
     TLLM_CHECK(0 <= batchSize && batchSlot < batchSize);
-    auto const maxBeamWidth = jointOutputIdsShape.d[1];
+    auto const maxBeamWidth = decoderState.getMaxBeamWidth();
     auto const beamWidth = samplingConfig.beamWidth;
     TLLM_CHECK_WITH_INFO(beamWidth <= maxBeamWidth,
-        tc::fmtstr("Beam width (%d) must be smaller than maxBeamWidth (" FMT_DIM ") passed to decoder setup function.",
+        tc::fmtstr("Beam width (%d) must be smaller than maxBeamWidth (%d) passed to decoder setup function.",
             beamWidth, maxBeamWidth));
     auto const& requestIds = request.ids;
     auto const inputLength = request.inputLen;
diff --git a/cpp/tensorrt_llm/batch_manager/transformerBuffers.cpp b/cpp/tensorrt_llm/batch_manager/transformerBuffers.cpp
index 42b1a89712..5810344a01 100644
--- a/cpp/tensorrt_llm/batch_manager/transformerBuffers.cpp
+++ b/cpp/tensorrt_llm/batch_manager/transformerBuffers.cpp
@@ -313,18 +313,6 @@ void TransformerBuffers::getBuffers(
     TLLM_LOG_TRACE("%s stop", __PRETTY_FUNCTION__);
 }
 
-void TransformerBuffers::reshapePositionIds(std::vector<SizeType32> const& positionIdsHost, bool isChatGlm)
-{
-    if (isChatGlm)
-    {
-        positionIds->reshape(ITensor::makeShape({2, static_cast<int>(positionIdsHost.size()) / 2}));
-    }
-    else
-    {
-        positionIds->reshape(ITensor::makeShape({static_cast<int>(positionIdsHost.size())}));
-    }
-}
-
 void TransformerBuffers::copyPositionIds(runtime::TllmRuntime const& runtime,
     std::vector<SizeType32> const& positionIdsHost, bool isChatGlm, TensorPtr const& decoderPositionIds)
 {
diff --git a/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp b/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp
index 0a1b6f03ec..e9dd6325e5 100644
--- a/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp
+++ b/cpp/tensorrt_llm/batch_manager/trtGptModelInflightBatching.cpp
@@ -1906,13 +1906,12 @@ void TrtGptModelInflightBatching::getDecoderSlotHostOutputs(
         // Make sure that postprocessing is done before copying outputIds
         mCopyBufferManager.getStream().wait(event.get());
 
-        TensorPtr sequenceLengthView
-            = ITensor::slice(mDecoder->getDecoderState().getJointDecodingOutput().lengths, seqSlot, 1);
+        auto sequenceLengths = mDecoder->getDecoderState().getSequenceLengths(seqSlot);
         auto outputIds = mDecoder->getDecoderState().getGatheredIds(seqSlot);
         auto cumLogProbs = mDecoder->getDecoderState().getCumLogProbs(seqSlot);
         auto logProbs = mDecoder->getDecoderState().getLogProbs(seqSlot);
 
-        mCopyBufferManager.copy(*sequenceLengthView, *mSlotDecoderBuffers[seqSlot]->sequenceLengths);
+        mCopyBufferManager.copy(*sequenceLengths, *mSlotDecoderBuffers[seqSlot]->sequenceLengths);
         mCopyBufferManager.copy(*outputIds, *mSlotDecoderBuffers[seqSlot]->outputIds);
         if (returnLogProbs)
         {
@@ -1927,7 +1926,7 @@ void TrtGptModelInflightBatching::getDecoderSlotHostOutputs(
 
             auto const peerSend = 0;
             mDecSlotAsyncSndHdls.emplace_back(std::make_unique<DecoderSlotAsyncSend>(
-                outputIds, sequenceLengthView, cumLogProbs, logProbs, returnLogProbs, *mMpiCommPipelinePara, peerSend));
+                outputIds, sequenceLengths, cumLogProbs, logProbs, returnLogProbs, *mMpiCommPipelinePara, peerSend));
         }
     }
     else
diff --git a/cpp/tensorrt_llm/batch_manager/updateDecoderBuffers.cpp b/cpp/tensorrt_llm/batch_manager/updateDecoderBuffers.cpp
index 83ab19ed33..0b94eb4f7e 100644
--- a/cpp/tensorrt_llm/batch_manager/updateDecoderBuffers.cpp
+++ b/cpp/tensorrt_llm/batch_manager/updateDecoderBuffers.cpp
@@ -40,8 +40,7 @@ runtime::CudaEvent UpdateDecoderBuffers::operator()(runtime::ModelConfig const&
     copyBufferManager.getStream().wait(decoderFinishEvent);
 
     copyBufferManager.copy(*decoder.getDecoderState().getAllNewTokens(), *decoderOutputBuffers.newOutputTokensHost);
-    copyBufferManager.copy(
-        *decoder.getDecoderState().getJointDecodingOutput().lengths, *decoderOutputBuffers.sequenceLengthsHost);
+    copyBufferManager.copy(*decoder.getDecoderState().getSequenceLengths(), *decoderOutputBuffers.sequenceLengthsHost);
 
     auto const finishedSumDevice = decoder.getDecoderState().getFinishedSum();
     copyBufferManager.copy(*finishedSumDevice, *decoderOutputBuffers.finishedSumHost);
diff --git a/cpp/tensorrt_llm/runtime/decoderState.cpp b/cpp/tensorrt_llm/runtime/decoderState.cpp
index db9ae904f7..c86b770575 100644
--- a/cpp/tensorrt_llm/runtime/decoderState.cpp
+++ b/cpp/tensorrt_llm/runtime/decoderState.cpp
@@ -417,75 +417,57 @@ void DecoderState::disableLookahead(RequestVector const& genRequests)
 
 TensorPtr DecoderState::getFinishedSum() const
 {
-    return ITensor::slice(mJointDecodingOutput->finishedSum, 0, mMaxBatchSize);
+    return mJointDecodingOutput->finishedSum;
 }
 
 TensorPtr DecoderState::getFinishReasons() const
 {
-    return ITensor::slice(mJointDecodingOutput->finishReasons, 0, mMaxBatchSize);
+    return mJointDecodingOutput->finishReasons;
 }
 
 TensorPtr DecoderState::getIds() const
 {
-    TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__);
-    auto tensor = ITensor::slice(mJointDecodingOutput->ids, 0, mMaxBatchSize);
-    TLLM_LOG_TRACE("%s stop", __PRETTY_FUNCTION__);
-    return tensor;
+    return mJointDecodingOutput->ids;
 }
 
 TensorPtr DecoderState::getIds(SizeType32 batchIdx) const
 {
-    TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__);
-    auto tensor = ITensor::slice(mJointDecodingOutput->ids, batchIdx, 1);
-    tensor->squeeze(0);
-    TLLM_LOG_TRACE("%s stop", __PRETTY_FUNCTION__);
-    return tensor;
+    return ITensor::at(mJointDecodingOutput->ids, {batchIdx});
 }
 
 TensorPtr DecoderState::getGatheredIds() const
 {
-    TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__);
-    auto tensor = ITensor::slice(mJointDecodingOutput->gatheredIds, 0, mMaxBatchSize);
-    TLLM_LOG_TRACE("%s stop", __PRETTY_FUNCTION__);
-    return tensor;
+    return mJointDecodingOutput->gatheredIds;
 }
 
 TensorPtr DecoderState::getGatheredIds(SizeType32 batchIdx) const
 {
-    TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__);
-    auto tensor = ITensor::slice(mJointDecodingOutput->gatheredIds, batchIdx, 1);
-    tensor->squeeze(0);
-    TLLM_LOG_TRACE("%s stop", __PRETTY_FUNCTION__);
-    return tensor;
+    return ITensor::at(mJointDecodingOutput->gatheredIds, {batchIdx});
 }
 
 TensorPtr DecoderState::getParentIds() const
 {
-    return ITensor::slice(mJointDecodingOutput->parentIds, 0, mMaxBatchSize);
+    return mJointDecodingOutput->parentIds;
 }
 
 TensorPtr DecoderState::getCumLogProbs() const
 {
-    return ITensor::slice(mJointDecodingOutput->cumLogProbs, 0, mMaxBatchSize);
+    return mJointDecodingOutput->cumLogProbs;
 }
 
 TensorPtr DecoderState::getCumLogProbs(SizeType32 batchIdx) const
 {
-    auto tensor = ITensor::slice(mJointDecodingOutput->cumLogProbs, batchIdx, 1);
-    tensor->squeeze(0);
-    return tensor;
+    return ITensor::at(mJointDecodingOutput->cumLogProbs, {batchIdx});
 }
 
 TensorPtr DecoderState::getLogProbs() const
 {
-    return ITensor::slice(mJointDecodingOutput->logProbs, 0, mMaxBatchSize);
+    return mJointDecodingOutput->logProbs;
 }
 
 TensorPtr DecoderState::getLogProbs(SizeType32 batchIdx) const
 {
-    auto tensor = ITensor::slice(mJointDecodingOutput->logProbs, batchIdx, 1);
-    tensor->squeeze(0);
-    return tensor;
+    return ITensor::at(mJointDecodingOutput->logProbs, {batchIdx});
 }
 
 TensorPtr DecoderState::getSequenceLengths() const
@@ -493,6 +475,11 @@ TensorPtr DecoderState::getSequenceLengths() const
     return mJointDecodingOutput->lengths;
 }
 
+TensorPtr DecoderState::getSequenceLengths(SizeType32 batchIdx) const
+{
+    return ITensor::at(mJointDecodingOutput->lengths, {batchIdx});
+}
+
 TensorPtr DecoderState::getAllNewTokens() const
 {
     return mJointDecodingOutput->newTokensSteps;
@@ -528,6 +515,11 @@ TensorPtr DecoderState::getFinishedSteps() const
     return mFinishedSteps;
 }
 
+SizeType32 DecoderState::getMaxBatchSize() const
+{
+    return mMaxBatchSize;
+}
+
 SizeType32 DecoderState::getMaxBeamWidth() const
 {
     return mMaxBeamWidth;
diff --git a/cpp/tensorrt_llm/runtime/gptDecoderBatched.cpp b/cpp/tensorrt_llm/runtime/gptDecoderBatched.cpp
index cfb896ac0b..aabf246ab5 100644
--- a/cpp/tensorrt_llm/runtime/gptDecoderBatched.cpp
+++ b/cpp/tensorrt_llm/runtime/gptDecoderBatched.cpp
@@ -208,8 +208,7 @@ void GptDecoderBatched::prepareForward(
 {
     TLLM_LOG_TRACE("%s start", __PRETTY_FUNCTION__);
 
-    auto const& jointOutputIdsShape = mDecoderState->getJointDecodingOutput().ids->getShape();
-    auto const maxBeamWidth = jointOutputIdsShape.d[1];
+    auto const maxBeamWidth = mDecoderState->getMaxBeamWidth();
     auto const speculativeDecodingMode = mDecoderState->getSpeculativeDecodingMode();
 
     auto& dInput = mDecoderState->getJointDecodingInput();
diff --git a/cpp/tensorrt_llm/runtime/runtimeKernels.h b/cpp/tensorrt_llm/runtime/runtimeKernels.h
index 27e8df379b..f1c34ace66 100644
--- a/cpp/tensorrt_llm/runtime/runtimeKernels.h
+++ b/cpp/tensorrt_llm/runtime/runtimeKernels.h
@@ -48,7 +48,7 @@ void scatterTensor(ITensor& output, ITensor const& input, SizeType32 beamWidth,
 void tileTensor(ITensor& output, ITensor const& input, SizeType32 beamWidth, CudaStream const& stream);
 
 void mergeLogitsFragments(BufferManager const& bufferManager, ITensor& output,
-    std::vector<TensorPtr> const& inputVector, ITensor& cachePointerDevice, ITensor& cachePointerHost,
+    std::vector<TensorPtr> const& fragmentsVector, ITensor& cachePointerDevice, ITensor& cachePointerHost,
     SizeType32 firstBatchSlotIdx, SizeType32 microBatchSize, SizeType32 beamWidth, CudaStream const& stream,
     int stepOffset);
 
diff --git a/cpp/tests/runtime/gptDecoderBatchedTest.cpp b/cpp/tests/runtime/gptDecoderBatchedTest.cpp
index 1aa919e95e..fd6093e486 100644
--- a/cpp/tests/runtime/gptDecoderBatchedTest.cpp
+++ b/cpp/tests/runtime/gptDecoderBatchedTest.cpp
@@ -325,7 +325,7 @@ void testDecoder(nvinfer1::DataType const dtype, std::vector<SamplingConfig>& sa
     auto decoderInputs = createDecoderInputs(batchSize, maxBeamWidth, maxSeqLength, vocabSizePadded, dataType,
         samplingConfigs, generatedTokensPerSteps, computeLogProbs, manager);
     auto outputs = createDecoderOutputs(batchSize, maxBeamWidth, maxSeqLength, tiledInputLengths,
-        *decoder.getDecoderState().getJointDecodingOutput().lengths, manager);
+        *decoder.getDecoderState().getSequenceLengths(), manager);
 
     std::vector<decoder_batch::Request> decoderRequests;
     newRequests(
@@ -333,7 +333,7 @@ void testDecoder(nvinfer1::DataType const dtype, std::vector<SamplingConfig>& sa
     cudaDeviceSynchronize();
 
     auto expectedLengths = tiledInputLengths;
-    checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+    checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
 
     auto const& finished = getFinished(*decoder.getDecoderState().getFinishedSum(), samplingConfigs, manager);
     EXPECT_EQ(finished.size(), batchSize);
@@ -352,7 +352,7 @@ void testDecoder(nvinfer1::DataType const dtype, std::vector<SamplingConfig>& sa
         decoderInputs.logits, batchSize, inputBuffers.forwardBatchSlots, decoderInputs.srcCacheIndirection);
     decoder.forward(outputs, *inputs);
 
-    checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+    checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
     EXPECT_THAT(
         getFinished(*decoder.getDecoderState().getFinishedSum(), samplingConfigs, manager), ::testing::Each(false));
 
@@ -363,7 +363,7 @@ void testDecoder(nvinfer1::DataType const dtype, std::vector<SamplingConfig>& sa
     advanceSequenceLengths(expectedLengths, acceptedTokensPerStep, samplingConfigs,
         getFinished(*decoder.getDecoderState().getFinishedSum(), samplingConfigs, manager), batchSize, maxBeamWidth);
     decoder.forward(outputs, *inputs);
-    checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+    checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
     EXPECT_THAT(
         getFinished(*decoder.getDecoderState().getFinishedSum(), samplingConfigs, manager), ::testing::Each(true));
 
@@ -371,7 +371,7 @@ void testDecoder(nvinfer1::DataType const dtype, std::vector<SamplingConfig>& sa
         maxSeqLength, inputTokenId, expectedTokenId, endId);
 
     EXPECT_NO_THROW(decoder.forward(outputs, *inputs));
-    checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+    checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
 
     TensorPtr batchSlotsView = ITensor::slice(inputBuffers.setupBatchSlots, 0, 1);
     std::vector<SamplingConfig> singleConfig = {samplingConfigs[0]};
@@ -454,7 +454,7 @@ void testDecoderWavefront(nvinfer1::DataType const dtype, std::vector<SamplingCo
     auto decoderInputs = createDecoderInputs(batchSize, maxBeamWidth, maxSeqLength, vocabSizePadded, dataType,
         samplingConfigs, generatedTokensPerSteps, computeLogProbs, manager);
     auto outputs = createDecoderOutputs(batchSize, maxBeamWidth, maxSeqLength, tiledInputLengths,
-        *decoder.getDecoderState().getJointDecodingOutput().lengths, manager);
+        *decoder.getDecoderState().getSequenceLengths(), manager);
 
     std::vector<SizeType32> const expectedSteps(batchSize, 0);
     auto expectedLengths = tiledInputLengths;
@@ -480,7 +480,7 @@ void testDecoderWavefront(nvinfer1::DataType const dtype, std::vector<SamplingCo
 
         advanceSequenceLengths(
             expectedLengths, acceptedTokensPerStep, samplingConfigs, expectedFinished, batchIdx + 1, maxBeamWidth);
-        checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+        checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
 
         for (auto bi = 0; bi <= batchIdx; ++bi)
         {
@@ -504,7 +504,7 @@ void testDecoderWavefront(nvinfer1::DataType const dtype, std::vector<SamplingCo
 
         advanceSequenceLengths(
             expectedLengths, acceptedTokensPerStep, samplingConfigs, expectedFinished, batchSize, maxBeamWidth);
-        checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+        checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
 
         for (auto bi = 0; bi < batchSize; ++bi)
         {
@@ -599,7 +599,7 @@ void testDecoderDraft(nvinfer1::DataType const dtype, std::vector<SamplingConfig
     auto decoderInputs = createDecoderInputs(batchSize, maxBeamWidth, maxSeqLength, vocabSizePadded, dataType,
         samplingConfigs, generatedTokensPerSteps, false, manager);
     auto outputs = createDecoderOutputs(batchSize, maxBeamWidth, maxSeqLength, tiledInputLengths,
-        *decoder.getDecoderState().getJointDecodingOutput().lengths, manager);
+        *decoder.getDecoderState().getSequenceLengths(), manager);
 
     auto batchSlotsRange = BufferRange<SizeType32>(*inputBuffers.setupBatchSlots);
     std::iota(batchSlotsRange.begin(), batchSlotsRange.end(), 0);
@@ -609,7 +609,7 @@ void testDecoderDraft(nvinfer1::DataType const dtype, std::vector<SamplingConfig
     cudaDeviceSynchronize();
 
     auto expectedLengths = tiledInputLengths;
-    checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+    checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
 
     auto const& finished = getFinished(*decoder.getDecoderState().getFinishedSum(), samplingConfigs, manager);
     EXPECT_EQ(finished.size(), batchSize);
@@ -627,7 +627,7 @@ void testDecoderDraft(nvinfer1::DataType const dtype, std::vector<SamplingConfig
     auto inputs = tb::MakeDecodingBatchInputOutput::createDecoderBatchInputs(activeSlots, decoder.getDecoderState(),
         decoderInputs.logits, batchSize, inputBuffers.forwardBatchSlots, decoderInputs.srcCacheIndirection);
     decoder.forward(outputs, *inputs);
-    checkSequenceLengths(*decoder.getDecoderState().getJointDecodingOutput().lengths, expectedLengths, manager);
+    checkSequenceLengths(*decoder.getDecoderState().getSequenceLengths(), expectedLengths, manager);
     EXPECT_THAT(
         getFinished(*decoder.getDecoderState().getFinishedSum(), samplingConfigs, manager), ::testing::Each(false));