mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* feat: Variable-Beam-Width-Search Part2 Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search Part2 Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search Part2, fix CPP tests Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search Part3, simplify CPP tests Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search Part4, move beam_width_array param Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search, fix CI error Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search part2 Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search part2 Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search part2, fix pre-commit Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> * feat: Variable-Beam-Width-Search part2, fix review Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> --------- Signed-off-by: wili-65535 <wili-65535@user.noreply.github.com> Co-authored-by: wili-65535 <wili-65535@user.noreply.github.com>
143 lines
3.6 KiB
YAML
143 lines
3.6 KiB
YAML
methods:
|
|
__init__:
|
|
parameters:
|
|
# General
|
|
n:
|
|
annotation: int
|
|
default: 1
|
|
best_of:
|
|
annotation: Optional[int]
|
|
default: null
|
|
use_beam_search:
|
|
annotation: bool
|
|
default: false
|
|
beam_search_diversity_rate:
|
|
annotation: Optional[float]
|
|
default: null
|
|
early_stopping:
|
|
annotation: Optional[int]
|
|
default: null
|
|
max_tokens:
|
|
annotation: int
|
|
default: 32
|
|
min_tokens:
|
|
annotation: Optional[int]
|
|
default: null
|
|
end_id:
|
|
annotation: Optional[int]
|
|
default: null
|
|
pad_id:
|
|
annotation: Optional[int]
|
|
default: null
|
|
# Sampling
|
|
seed:
|
|
annotation: Optional[int]
|
|
default: null
|
|
temperature:
|
|
annotation: Optional[float]
|
|
default: null
|
|
top_k:
|
|
annotation: Optional[int]
|
|
default: null
|
|
top_p:
|
|
annotation: Optional[float]
|
|
default: null
|
|
top_p_decay:
|
|
annotation: Optional[float]
|
|
default: null
|
|
top_p_min:
|
|
annotation: Optional[float]
|
|
default: null
|
|
top_p_reset_ids:
|
|
annotation: Optional[int]
|
|
default: null
|
|
min_p:
|
|
annotation: Optional[float]
|
|
default: null
|
|
# Panelities
|
|
repetition_penalty:
|
|
annotation: Optional[float]
|
|
default: null
|
|
presence_penalty:
|
|
annotation: Optional[float]
|
|
default: null
|
|
frequency_penalty:
|
|
annotation: Optional[float]
|
|
default: null
|
|
length_penalty:
|
|
annotation: Optional[float]
|
|
default: null
|
|
no_repeat_ngram_size:
|
|
annotation: Optional[int]
|
|
default: null
|
|
# Stop words and bad words
|
|
stop:
|
|
annotation: Union[List[str], str, NoneType]
|
|
default: null
|
|
stop_token_ids:
|
|
annotation: Optional[List[int]]
|
|
default: null
|
|
include_stop_str_in_output:
|
|
annotation: bool
|
|
default: false
|
|
bad:
|
|
annotation: Union[List[str], str, NoneType]
|
|
default: null
|
|
bad_token_ids:
|
|
annotation: Optional[List[int]]
|
|
default: null
|
|
# Logits processor and guided decoding
|
|
logits_processor:
|
|
annotation: Optional[tensorrt_llm.sampling_params.LogitsProcessor]
|
|
default: null
|
|
apply_batched_logits_processor:
|
|
annotation: bool
|
|
default: false
|
|
guided_decoding:
|
|
annotation: Optional[tensorrt_llm.sampling_params.GuidedDecodingParams]
|
|
default: null
|
|
embedding_bias:
|
|
annotation: Optional[torch.Tensor]
|
|
default: null
|
|
# Speculative decoding
|
|
lookahead_config:
|
|
annotation: Optional[tensorrt_llm.bindings.executor.LookaheadDecodingConfig]
|
|
default: null
|
|
# Tokenizer behavior
|
|
ignore_eos:
|
|
annotation: bool
|
|
default: false
|
|
detokenize:
|
|
annotation: bool
|
|
default: true
|
|
add_special_tokens:
|
|
annotation: bool
|
|
default: true
|
|
truncate_prompt_tokens:
|
|
annotation: Optional[int]
|
|
default: null
|
|
skip_special_tokens:
|
|
annotation: bool
|
|
default: true
|
|
spaces_between_special_tokens:
|
|
annotation: bool
|
|
default: true
|
|
# Returning controls
|
|
return_log_probs:
|
|
annotation: bool
|
|
default: false
|
|
return_context_logits:
|
|
annotation: bool
|
|
default: false
|
|
return_generation_logits:
|
|
annotation: bool
|
|
default: false
|
|
exclude_input_from_output:
|
|
annotation: bool
|
|
default: true
|
|
return_encoder_output:
|
|
annotation: bool
|
|
default: false
|
|
return_annotation: None
|
|
properties: {}
|