mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* Move TRT-LLM backend repo to TRT-LLM repo Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * Address review comments Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * debug ci Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * Update triton backend Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> * Fixes after update Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> --------- Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
26 lines
643 B
JSON
26 lines
643 B
JSON
{
|
|
"parameters": {
|
|
"gpu_device_ids": {
|
|
"string_value": "0"
|
|
},
|
|
"max_beam_width": {
|
|
"string_value": "4"
|
|
},
|
|
"batch_scheduler_policy": {
|
|
"string_value": "guaranteed_no_evict"
|
|
},
|
|
"executor_worker_path": {
|
|
"string_value": "/opt/tritonserver/backends/tensorrtllm/trtllmExecutorWorker"
|
|
},
|
|
"normalize_log_probs": {
|
|
"string_value": "false"
|
|
},
|
|
"gpt_model_type": {
|
|
"string_value": "inflight_fused_batching"
|
|
}
|
|
},
|
|
"model_transaction_policy": {
|
|
"decoupled": true
|
|
}
|
|
}
|