mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
revert back Eagle3DecodingConfig migrations for trt backend tests
Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com>
This commit is contained in:
parent
438fe8ada3
commit
dda588ec23
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
import pytest
|
||||
|
||||
from tensorrt_llm.llmapi import (Eagle3DecodingConfig, LookaheadDecodingConfig,
|
||||
from tensorrt_llm.llmapi import (EagleDecodingConfig, LookaheadDecodingConfig,
|
||||
MedusaDecodingConfig)
|
||||
from tensorrt_llm.quantization import QuantAlgo
|
||||
|
||||
@ -476,7 +476,7 @@ class TestVicuna7B(CliFlowAccuracyTestHarness):
|
||||
extra_summarize_args.extend(
|
||||
["--eagle_posterior_threshold=0.09", "--temperature=0.7"])
|
||||
|
||||
self.run(spec_dec_algo=Eagle3DecodingConfig.decoding_type,
|
||||
self.run(spec_dec_algo=EagleDecodingConfig.decoding_type,
|
||||
extra_convert_args=[
|
||||
f"--eagle_model_dir={self.EAGLE_MODEL_PATH}",
|
||||
"--max_draft_len=63", "--num_eagle_layers=4",
|
||||
@ -503,7 +503,7 @@ class TestVicuna7B(CliFlowAccuracyTestHarness):
|
||||
if chunked_context:
|
||||
extra_summarize_args.append("--enable_chunked_context")
|
||||
|
||||
self.run(spec_dec_algo=Eagle3DecodingConfig.decoding_type,
|
||||
self.run(spec_dec_algo=EagleDecodingConfig.decoding_type,
|
||||
extra_convert_args=[
|
||||
f"--eagle_model_dir={self.EAGLE_MODEL_PATH}",
|
||||
"--max_draft_len=63", "--num_eagle_layers=4",
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
import pytest
|
||||
|
||||
from tensorrt_llm._tensorrt_engine import LLM
|
||||
from tensorrt_llm.llmapi import (Eagle3DecodingConfig,
|
||||
from tensorrt_llm.llmapi import (EagleDecodingConfig,
|
||||
ExtendedRuntimePerfKnobConfig, KvCacheConfig,
|
||||
SamplingParams)
|
||||
from tensorrt_llm.models.modeling_utils import QuantConfig
|
||||
@ -469,7 +469,7 @@ class TestEagleVicuna_7B_v1_3(LlmapiAccuracyTestHarness):
|
||||
MODEL_NAME = "lmsys/vicuna-7b-v1.3"
|
||||
MODEL_PATH = f"{llm_models_root()}/vicuna-7b-v1.3"
|
||||
|
||||
speculative_config = Eagle3DecodingConfig(
|
||||
speculative_config = EagleDecodingConfig(
|
||||
max_draft_len=63,
|
||||
speculative_model_dir=f"{llm_models_root()}/EAGLE-Vicuna-7B-v1.3",
|
||||
num_eagle_layers=4,
|
||||
@ -495,7 +495,7 @@ class TestEagle2Vicuna_7B_v1_3(LlmapiAccuracyTestHarness):
|
||||
MODEL_NAME = "lmsys/vicuna-7b-v1.3"
|
||||
MODEL_PATH = f"{llm_models_root()}/vicuna-7b-v1.3"
|
||||
|
||||
speculative_config = Eagle3DecodingConfig(
|
||||
speculative_config = EagleDecodingConfig(
|
||||
max_draft_len=63,
|
||||
speculative_model_dir=f"{llm_models_root()}/EAGLE-Vicuna-7B-v1.3",
|
||||
num_eagle_layers=4,
|
||||
|
||||
@ -32,7 +32,7 @@ from tensorrt_llm.bindings import executor as tllm
|
||||
from tensorrt_llm.executor import (GenerationExecutorWorker, GenerationRequest,
|
||||
GenerationResult, LoRARequest,
|
||||
PromptAdapterRequest, RequestError)
|
||||
from tensorrt_llm.llmapi import (BuildCacheConfig, Eagle3DecodingConfig,
|
||||
from tensorrt_llm.llmapi import (BuildCacheConfig, EagleDecodingConfig,
|
||||
KvCacheConfig, KvCacheRetentionConfig,
|
||||
LookaheadDecodingConfig, MedusaDecodingConfig,
|
||||
RequestOutput)
|
||||
@ -1293,7 +1293,7 @@ def test_llm_api_eagle(**llm_kwargs):
|
||||
|
||||
kv_cache_config = KvCacheConfig(enable_block_reuse=True)
|
||||
|
||||
speculative_config = Eagle3DecodingConfig(
|
||||
speculative_config = EagleDecodingConfig(
|
||||
max_draft_len=63,
|
||||
speculative_model_dir=get_model_path("EAGLE-Vicuna-7B-v1.3"),
|
||||
num_eagle_layers=4,
|
||||
@ -1340,7 +1340,7 @@ def test_llm_api_eagle2(**llm_kwargs):
|
||||
|
||||
kv_cache_config = KvCacheConfig(enable_block_reuse=True)
|
||||
|
||||
speculative_config = Eagle3DecodingConfig(
|
||||
speculative_config = EagleDecodingConfig(
|
||||
max_draft_len=63,
|
||||
speculative_model_dir=get_model_path("EAGLE-Vicuna-7B-v1.3"),
|
||||
num_eagle_layers=4,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user