mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* committed APIs validation Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * clean name Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * separate Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * add TODOs Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix naming Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --------- Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
30 lines
1.1 KiB
Python
30 lines
1.1 KiB
Python
from pathlib import Path
|
|
from typing import Any, Literal, Optional, Union
|
|
|
|
from transformers import PreTrainedTokenizerBase
|
|
|
|
from ..llmapi.llm import LLM as BaseLLM
|
|
from ..llmapi.llm import TokenizerBase
|
|
|
|
|
|
class LLM(BaseLLM):
|
|
|
|
def __init__(self,
|
|
model: str,
|
|
tokenizer: Optional[Union[str, Path, TokenizerBase,
|
|
PreTrainedTokenizerBase]] = None,
|
|
tokenizer_mode: Literal['auto', 'slow'] = 'auto',
|
|
skip_tokenizer_init: bool = False,
|
|
trust_remote_code: bool = False,
|
|
tensor_parallel_size: int = 1,
|
|
dtype: str = "auto",
|
|
revision: Optional[str] = None,
|
|
tokenizer_revision: Optional[str] = None,
|
|
**kwargs: Any):
|
|
|
|
kwargs_dict = dict(kwargs)
|
|
kwargs_dict['backend'] = 'pytorch'
|
|
super().__init__(model, tokenizer, tokenizer_mode, skip_tokenizer_init,
|
|
trust_remote_code, tensor_parallel_size, dtype,
|
|
revision, tokenizer_revision, **kwargs_dict)
|