mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* add deepseek-r1 reasoning parser Signed-off-by: pansicheng <sicheng.pan.chn@gmail.com> * fix test Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> --------- Signed-off-by: pansicheng <sicheng.pan.chn@gmail.com> Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> Co-authored-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
12 lines
356 B
Bash
12 lines
356 B
Bash
#! /usr/bin/env bash
|
|
|
|
trtllm-serve \
|
|
deepseek-ai/DeepSeek-R1 \
|
|
--host localhost --port 8000 \
|
|
--backend pytorch \
|
|
--max_batch_size 161 --max_num_tokens 1160 \
|
|
--tp_size 8 --ep_size 8 --pp_size 1 \
|
|
--kv_cache_free_gpu_memory_fraction 0.95 \
|
|
--extra_llm_api_options ./extra-llm-api-config.yml \
|
|
--reasoning_parser deepseek-r1
|