revert back Eagle3DecodingConfig migrations for trt backend tests

Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com>
This commit is contained in:
Venky Ganesh 2026-01-12 11:31:41 -08:00
parent 438fe8ada3
commit dda588ec23
3 changed files with 9 additions and 9 deletions

View File

@ -14,7 +14,7 @@
# limitations under the License.
import pytest
from tensorrt_llm.llmapi import (Eagle3DecodingConfig, LookaheadDecodingConfig,
from tensorrt_llm.llmapi import (EagleDecodingConfig, LookaheadDecodingConfig,
MedusaDecodingConfig)
from tensorrt_llm.quantization import QuantAlgo
@ -476,7 +476,7 @@ class TestVicuna7B(CliFlowAccuracyTestHarness):
extra_summarize_args.extend(
["--eagle_posterior_threshold=0.09", "--temperature=0.7"])
self.run(spec_dec_algo=Eagle3DecodingConfig.decoding_type,
self.run(spec_dec_algo=EagleDecodingConfig.decoding_type,
extra_convert_args=[
f"--eagle_model_dir={self.EAGLE_MODEL_PATH}",
"--max_draft_len=63", "--num_eagle_layers=4",
@ -503,7 +503,7 @@ class TestVicuna7B(CliFlowAccuracyTestHarness):
if chunked_context:
extra_summarize_args.append("--enable_chunked_context")
self.run(spec_dec_algo=Eagle3DecodingConfig.decoding_type,
self.run(spec_dec_algo=EagleDecodingConfig.decoding_type,
extra_convert_args=[
f"--eagle_model_dir={self.EAGLE_MODEL_PATH}",
"--max_draft_len=63", "--num_eagle_layers=4",

View File

@ -15,7 +15,7 @@
import pytest
from tensorrt_llm._tensorrt_engine import LLM
from tensorrt_llm.llmapi import (Eagle3DecodingConfig,
from tensorrt_llm.llmapi import (EagleDecodingConfig,
ExtendedRuntimePerfKnobConfig, KvCacheConfig,
SamplingParams)
from tensorrt_llm.models.modeling_utils import QuantConfig
@ -469,7 +469,7 @@ class TestEagleVicuna_7B_v1_3(LlmapiAccuracyTestHarness):
MODEL_NAME = "lmsys/vicuna-7b-v1.3"
MODEL_PATH = f"{llm_models_root()}/vicuna-7b-v1.3"
speculative_config = Eagle3DecodingConfig(
speculative_config = EagleDecodingConfig(
max_draft_len=63,
speculative_model_dir=f"{llm_models_root()}/EAGLE-Vicuna-7B-v1.3",
num_eagle_layers=4,
@ -495,7 +495,7 @@ class TestEagle2Vicuna_7B_v1_3(LlmapiAccuracyTestHarness):
MODEL_NAME = "lmsys/vicuna-7b-v1.3"
MODEL_PATH = f"{llm_models_root()}/vicuna-7b-v1.3"
speculative_config = Eagle3DecodingConfig(
speculative_config = EagleDecodingConfig(
max_draft_len=63,
speculative_model_dir=f"{llm_models_root()}/EAGLE-Vicuna-7B-v1.3",
num_eagle_layers=4,

View File

@ -32,7 +32,7 @@ from tensorrt_llm.bindings import executor as tllm
from tensorrt_llm.executor import (GenerationExecutorWorker, GenerationRequest,
GenerationResult, LoRARequest,
PromptAdapterRequest, RequestError)
from tensorrt_llm.llmapi import (BuildCacheConfig, Eagle3DecodingConfig,
from tensorrt_llm.llmapi import (BuildCacheConfig, EagleDecodingConfig,
KvCacheConfig, KvCacheRetentionConfig,
LookaheadDecodingConfig, MedusaDecodingConfig,
RequestOutput)
@ -1293,7 +1293,7 @@ def test_llm_api_eagle(**llm_kwargs):
kv_cache_config = KvCacheConfig(enable_block_reuse=True)
speculative_config = Eagle3DecodingConfig(
speculative_config = EagleDecodingConfig(
max_draft_len=63,
speculative_model_dir=get_model_path("EAGLE-Vicuna-7B-v1.3"),
num_eagle_layers=4,
@ -1340,7 +1340,7 @@ def test_llm_api_eagle2(**llm_kwargs):
kv_cache_config = KvCacheConfig(enable_block_reuse=True)
speculative_config = Eagle3DecodingConfig(
speculative_config = EagleDecodingConfig(
max_draft_len=63,
speculative_model_dir=get_model_path("EAGLE-Vicuna-7B-v1.3"),
num_eagle_layers=4,