mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-01 08:41:13 +08:00
47 lines
1.1 KiB
YAML
47 lines
1.1 KiB
YAML
model:
|
|
model_type: llm
|
|
llm_type: openai-apicompatible
|
|
llm_name: Llama-2-7b-chat-hf
|
|
container: null
|
|
use_cache: false
|
|
use_chat_endpoint: false
|
|
chat_template: ""
|
|
|
|
is_hf_model: false
|
|
base_url: http://localhost:12478/v1
|
|
|
|
evaluations:
|
|
-
|
|
eval_type: llm_as_a_judge
|
|
eval_subtype: mtbench
|
|
bench_name: mt_bench
|
|
mode: single
|
|
native_args: " --num-gpus-total 8 --num-gpus-per-model 8"
|
|
evaluation_stages:
|
|
- generation
|
|
- judgement
|
|
inference_params:
|
|
top_p: 0.00001
|
|
top_k: 1
|
|
temperature: 0.00001
|
|
stop: []
|
|
tokens_to_generate: 1024
|
|
judge_model:
|
|
model_type: llm
|
|
llm_type: nvidia-llmgateway
|
|
llm_name: gpt-4
|
|
use_cache: false
|
|
use_chat_endpoint: true
|
|
base_url: https://prod.api.nvidia.com/llm/v1/azure/
|
|
api_version: 2023-12-01-preview
|
|
client_id: xxx
|
|
client_secret: xxx
|
|
scope: azureopenai-readwrite
|
|
grant_type: client_credentials
|
|
judge_inference_params:
|
|
top_p: 0.00001
|
|
top_k: 1
|
|
temperature: 0.00001
|
|
stop: []
|
|
tokens_to_generate: 1024
|