mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* Update TensorRT-LLM --------- Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
648 lines
21 KiB
C++
648 lines
21 KiB
C++
/*
|
|
* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "tests/layers/samplingLayerTest.h"
|
|
|
|
namespace
|
|
{
|
|
|
|
using namespace tensorrt_llm::tests::layers::sampling;
|
|
|
|
template <typename T>
|
|
class TopKSamplingLayerTest : public SamplingLayerTest<T>
|
|
{
|
|
void SetUp() override
|
|
{
|
|
this->mStream = std::make_shared<tensorrt_llm::runtime::CudaStream>();
|
|
this->mBufferManager = std::make_shared<tensorrt_llm::runtime::BufferManager>(this->mStream);
|
|
|
|
this->mAllocator = std::make_shared<tensorrt_llm::common::CudaAllocator>(*this->mBufferManager);
|
|
|
|
this->mSamplingLayer = std::make_shared<tensorrt_llm::layers::TopKSamplingLayer<T>>(
|
|
this->mVocabSize, this->mVocabSizePadded, this->mStream->get(), this->mAllocator, false);
|
|
}
|
|
};
|
|
|
|
TYPED_TEST_SUITE(TopKSamplingLayerTest, FloatAndHalfTypes);
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopK)
|
|
{
|
|
uint32_t topK = 2;
|
|
float topP = 0.0f;
|
|
SamplingParams params;
|
|
params.topKs = {topK};
|
|
params.topPs = {topP};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4, 5}, {4, 5}, {4, 5}, {4, 5}, {4, 5}, {4, 5}, // step 0
|
|
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, // step 1
|
|
{2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, // step 2
|
|
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopK1TopP0)
|
|
{
|
|
uint32_t topK = 1;
|
|
float topP = 0.0f;
|
|
SamplingParams params;
|
|
params.topKs = {topK};
|
|
params.topPs = {topP};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, BatchTopK)
|
|
{
|
|
std::vector<uint32_t> topKs = {2, 1, 1, 2, 1, 1};
|
|
SamplingParams params;
|
|
params.topKs = topKs;
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4, 5}, {4}, {4}, {4, 5}, {4}, {4}, // step 0
|
|
{0, 1}, {0}, {0}, {0, 1}, {0}, {0}, // step 1
|
|
{2, 3}, {2}, {2}, {2, 3}, {2}, {2}, // step 2
|
|
{0, 1}, {0}, {0}, {0, 1}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, SkipDecode)
|
|
{
|
|
// Skip topK decode
|
|
float topP = 0.3;
|
|
SamplingParams params;
|
|
params.topPs = {topP};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKTopP)
|
|
{
|
|
uint32_t topK = 2;
|
|
float topP = 0.3;
|
|
SamplingParams params;
|
|
params.topKs = {topK};
|
|
params.topPs = {topP};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, BatchTopKTopP)
|
|
{
|
|
std::vector<uint32_t> topKs = {2, 2, 1, 2, 2, 1};
|
|
float topP = 0.3;
|
|
SamplingParams params;
|
|
params.topKs = topKs;
|
|
params.topPs = {topP};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKBatchTopP)
|
|
{
|
|
uint32_t topK = 2;
|
|
std::vector<float> topPs = {0.5, 0.3, 0.5, 0.5, 0.3, 0.5};
|
|
SamplingParams params;
|
|
params.topKs = {topK};
|
|
params.topPs = topPs;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4, 5}, {4}, {4, 5}, {4, 5}, {4}, {4, 5}, // step 0
|
|
{0, 1}, {0}, {0, 1}, {0, 1}, {0}, {0, 1}, // step 1
|
|
{2, 3}, {2}, {2, 3}, {2, 3}, {2}, {2, 3}, // step 2
|
|
{0, 1}, {0}, {0, 1}, {0, 1}, {0}, {0, 1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, BatchTopKBatchTopP)
|
|
{
|
|
std::vector<uint32_t> topKs = {2, 2, 1, 2, 2, 1};
|
|
std::vector<float> topPs = {0.0, 0.3, 0.5, 0.0, 0.3, 0.5};
|
|
SamplingParams params;
|
|
params.topKs = topKs;
|
|
params.topPs = topPs;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4, 5}, {4}, {4}, {4, 5}, {4}, {4}, // step 0
|
|
{0, 1}, {0}, {0}, {0, 1}, {0}, {0}, // step 1
|
|
{2, 3}, {2}, {2}, {2, 3}, {2}, {2}, // step 2
|
|
{0, 1}, {0}, {0}, {0, 1}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsZeroTopK)
|
|
{
|
|
uint32_t topK = 0;
|
|
SamplingParams params;
|
|
params.topKs = {topK};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsZeroTopP)
|
|
{
|
|
float topP = 0;
|
|
SamplingParams params;
|
|
params.topPs = {topP};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsZeroTopKTopP)
|
|
{
|
|
uint32_t topK = 0;
|
|
float topP = 0;
|
|
SamplingParams params;
|
|
params.topPs = {topP};
|
|
params.topKs = {topK};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsZeroBatchTopKTopP)
|
|
{
|
|
std::vector<uint32_t> topKs = {0, 0, 0, 0, 0, 0};
|
|
float topP = 0;
|
|
SamplingParams params;
|
|
params.topPs = {topP};
|
|
params.topKs = topKs;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsZeroTopKBatchTopP)
|
|
{
|
|
uint32_t topK = 0;
|
|
std::vector<float> topPs = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
|
SamplingParams params;
|
|
params.topPs = topPs;
|
|
params.topKs = {topK};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsBatchTopKContainZero)
|
|
{
|
|
std::vector<uint32_t> topKs = {2, 1, 0, 0, 2, 1};
|
|
SamplingParams params;
|
|
params.topKs = topKs;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4, 5}, {4}, {4}, {4}, {4, 5}, {4}, // step 0
|
|
{0, 1}, {0}, {0}, {0}, {0, 1}, {0}, // step 1
|
|
{2, 3}, {2}, {2}, {2}, {2, 3}, {2}, // step 2
|
|
{0, 1}, {0}, {0}, {0}, {0, 1}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsBatchTopKTopPContainZero)
|
|
{
|
|
std::vector<uint32_t> topKs = {2, 2, 1, 0, 2, 0};
|
|
float topP = 0.0;
|
|
SamplingParams params;
|
|
params.topPs = {topP};
|
|
params.topKs = topKs;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4, 5}, {4, 5}, {4}, {4}, {4, 5}, {4}, // step 0
|
|
{0, 1}, {0, 1}, {0}, {0}, {0, 1}, {0}, // step 1
|
|
{2, 3}, {2, 3}, {2}, {2}, {2, 3}, {2}, // step 2
|
|
{0, 1}, {0, 1}, {0}, {0}, {0, 1}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, InvalidArgsBatchTopKBatchTopPContainZero)
|
|
{
|
|
std::vector<uint32_t> topKs = {0, 2, 1, 2, 2, 0};
|
|
std::vector<float> topPs = {0.0, 0.3, 0.9, 0.0, 0.3, 0.5};
|
|
SamplingParams params;
|
|
params.topPs = topPs;
|
|
params.topKs = topKs;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4, 5}, {4}, {0}, // step 0
|
|
{0}, {0}, {0}, {0, 1}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2, 3}, {2}, {0}, // step 2
|
|
{0}, {0}, {0}, {0, 1}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKTemperature)
|
|
{
|
|
uint32_t topK = 2;
|
|
float temperature = 0.05f;
|
|
SamplingParams params;
|
|
params.temperatures = {temperature};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKTemperatureBatch)
|
|
{
|
|
uint32_t topK = 2;
|
|
std::vector<float> temperatures = {0.05f, 1e3f, 1.0f, 0.5f, 0.05f, 1.0f};
|
|
SamplingParams params;
|
|
params.temperatures = temperatures;
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
{4}, {4, 5, 6, 7}, {4, 5}, {4, 5}, {4}, {4, 5}, // step 0
|
|
{0}, {0, 1, 2, 3}, {0, 1}, {0, 1}, {0}, {0, 1}, // step 1
|
|
{2}, {2, 3, 4, 5}, {2, 3}, {2, 3}, {2}, {2, 3}, // step 2
|
|
{0}, {0, 1, 2, 3}, {0, 1}, {0, 1}, {0}, {0, 1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKRepetitionPenalty)
|
|
{
|
|
uint32_t topK = 1;
|
|
float repetitionPenalty = 1e9f;
|
|
SamplingParams params;
|
|
params.repetitionPenalties = {repetitionPenalty};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {1}, {1}, {1}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKRepetitionPenaltiesBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<float> repetitionPenalties = {1e9f, 1e9f, 1.0f, 1.0f, 1.0f, 1e9f};
|
|
SamplingParams params;
|
|
params.repetitionPenalties = repetitionPenalties;
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {0}, {0}, {0}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKPresencePenalty)
|
|
{
|
|
uint32_t topK = 1;
|
|
float presencePenalty = 1e9f;
|
|
SamplingParams params;
|
|
params.presencePenalties = {presencePenalty};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {1}, {1}, {1}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKPresencePenaltiesBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<float> presencePenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
SamplingParams params;
|
|
params.presencePenalties = presencePenalties;
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {0}, {0}, {0}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKFrequencyPenalty)
|
|
{
|
|
uint32_t topK = 1;
|
|
float frequencyPenalty = 1e9f;
|
|
SamplingParams params;
|
|
params.frequencyPenalties = {frequencyPenalty};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {1}, {1}, {1}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKFrequencyPenaltiesBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<float> frequencyPenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
SamplingParams params;
|
|
params.frequencyPenalties = frequencyPenalties;
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {0}, {0}, {0}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKRepetitionPresencePenalty)
|
|
{
|
|
uint32_t topK = 1;
|
|
float repetitionPenalty = 1e9f;
|
|
float presencePenalty = 1e9f;
|
|
SamplingParams params;
|
|
params.repetitionPenalties = {repetitionPenalty};
|
|
params.presencePenalties = {presencePenalty};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {1}, {1}, {1}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKRepetitionPresencePenaltiesBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<float> repetitionPenalties = {1e9f, 1e9f, 1.0f, 1.0f, 1.0f, 1e9f};
|
|
std::vector<float> presencePenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
SamplingParams params;
|
|
params.repetitionPenalties = {repetitionPenalties};
|
|
params.presencePenalties = {presencePenalties};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {0}, {0}, {0}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKRepetitionFrequencyPenalty)
|
|
{
|
|
uint32_t topK = 1;
|
|
float repetitionPenalty = 1e9f;
|
|
float frequencyPenalty = 1e9f;
|
|
SamplingParams params;
|
|
params.repetitionPenalties = {repetitionPenalty};
|
|
params.frequencyPenalties = {frequencyPenalty};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {1}, {1}, {1}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKRepetitionFrequencyPenaltiesBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<float> repetitionPenalties = {1e9f, 1e9f, 1.0f, 1.0f, 1.0f, 1e9f};
|
|
std::vector<float> frequencyPenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
SamplingParams params;
|
|
params.repetitionPenalties = {repetitionPenalties};
|
|
params.frequencyPenalties = {frequencyPenalties};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {0}, {0}, {0}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKPresenceFrequencyPenalty)
|
|
{
|
|
uint32_t topK = 1;
|
|
float presencePenalty = 1e9f;
|
|
float frequencyPenalty = 1e9f;
|
|
SamplingParams params;
|
|
params.presencePenalties = {presencePenalty};
|
|
params.frequencyPenalties = {frequencyPenalty};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {1}, {1}, {1}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKPresenceFrequencyPenaltiesBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<float> presencePenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
std::vector<float> frequencyPenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
SamplingParams params;
|
|
params.presencePenalties = {presencePenalties};
|
|
params.frequencyPenalties = {frequencyPenalties};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {0}, {0}, {0}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKFullPenalty)
|
|
{
|
|
uint32_t topK = 1;
|
|
float repetitionPenalty = 1e9f;
|
|
float presencePenalty = 1e9f;
|
|
float frequencyPenalty = 1e9f;
|
|
SamplingParams params;
|
|
params.repetitionPenalties = {repetitionPenalty};
|
|
params.presencePenalties = {presencePenalty};
|
|
params.frequencyPenalties = {frequencyPenalty};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {1}, {1}, {1}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKFullPenaltiesBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<float> repetitionPenalties = {1e9f, 1e9f, 1.0f, 1.0f, 1.0f, 1e9f};
|
|
std::vector<float> presencePenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
std::vector<float> frequencyPenalties = {1e9f, 1e9f, 0.0f, 0.0f, 0.0f, 1e9f};
|
|
SamplingParams params;
|
|
params.repetitionPenalties = {repetitionPenalties};
|
|
params.presencePenalties = {presencePenalties};
|
|
params.frequencyPenalties = {frequencyPenalties};
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{0}, {0}, {0}, {0}, {0}, {0}, // step 1
|
|
{2}, {2}, {2}, {2}, {2}, {2}, // step 2
|
|
{1}, {1}, {0}, {0}, {0}, {1} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKMinLengthBatch)
|
|
{
|
|
uint32_t topK = 1;
|
|
std::vector<int32_t> minLengths = {3, 1, 1, 3, 0, 3};
|
|
SamplingParams params;
|
|
params.minLengths = minLengths;
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
int32_t const endId = 0;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4}, {4}, {4}, {4}, {4}, {4}, // step 0
|
|
{1}, {0}, {0}, {1}, {0}, {1}, // step 1
|
|
{2}, {0}, {0}, {2}, {0}, {2}, // step 2
|
|
{0}, {0}, {0}, {0}, {0}, {0} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params, endId);
|
|
}
|
|
|
|
TYPED_TEST(TopKSamplingLayerTest, TopKBias)
|
|
{
|
|
uint32_t topK = 2;
|
|
SamplingParams params;
|
|
params.topKs = {topK};
|
|
params.topPs = {1.0f};
|
|
params.useBias = true;
|
|
std::vector<std::set<int32_t>> expectedOutputIds{
|
|
// batch
|
|
{4, 5}, {4, 5}, {4, 5}, {4, 5}, {4, 5}, {4, 5}, // step 0
|
|
{2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, // step 1
|
|
{2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, // step 2
|
|
{2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3} // step 3
|
|
};
|
|
this->runTest(expectedOutputIds, params);
|
|
}
|
|
|
|
} // namespace
|