diff --git a/tests/integration/defs/accuracy/accuracy_core.py b/tests/integration/defs/accuracy/accuracy_core.py index e30c6e2c2c..d7b885ad74 100644 --- a/tests/integration/defs/accuracy/accuracy_core.py +++ b/tests/integration/defs/accuracy/accuracy_core.py @@ -452,7 +452,7 @@ class LongBenchV2(AccuracyTask): EVALUATOR_KWARGS = dict( dataset_path=DATASET_DIR, length="medium", - max_len=120000, + max_input_length=120000, apply_chat_template=True, random_seed=0, ) diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 76b1f7bb02..31446cd410 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -1126,6 +1126,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness): task = MMLU(self.MODEL_NAME) task.evaluate(llm) + @skip_pre_hopper def test_fp8_vswa_reuse(self): # NOTE: Test with VSWA kv cache config. kv_cache_config = KvCacheConfig( @@ -1140,6 +1141,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness): task = MMLU(self.MODEL_NAME) task.evaluate(llm) + @skip_pre_hopper @pytest.mark.parametrize("backend", ["xgrammar"]) def test_fp8_guided_decoding_vswa_reuse(self, backend: str, mocker): mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})