mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
[None][test] Update test case for release (#10763)
Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
parent
4df0ca8bd1
commit
4ebc1b1596
@ -452,7 +452,7 @@ class LongBenchV2(AccuracyTask):
|
||||
EVALUATOR_KWARGS = dict(
|
||||
dataset_path=DATASET_DIR,
|
||||
length="medium",
|
||||
max_len=120000,
|
||||
max_input_length=120000,
|
||||
apply_chat_template=True,
|
||||
random_seed=0,
|
||||
)
|
||||
|
||||
@ -1126,6 +1126,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
|
||||
task = MMLU(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
|
||||
@skip_pre_hopper
|
||||
def test_fp8_vswa_reuse(self):
|
||||
# NOTE: Test with VSWA kv cache config.
|
||||
kv_cache_config = KvCacheConfig(
|
||||
@ -1140,6 +1141,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
|
||||
task = MMLU(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.parametrize("backend", ["xgrammar"])
|
||||
def test_fp8_guided_decoding_vswa_reuse(self, backend: str, mocker):
|
||||
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user