TensorRT-LLMs/tests/integration/evaltool/eval_configs/mt_bench.yml

model:
  model_type: llm
  llm_type: openai-apicompatible
  llm_name: Llama-2-7b-chat-hf
  container: null
  use_cache: false
  use_chat_endpoint: false
  chat_template: ""

  is_hf_model: false
  base_url: http://localhost:12478/v1

evaluations:
  -
    eval_type: llm_as_a_judge
    eval_subtype: mtbench
    bench_name: mt_bench
    mode: single
    native_args: " --num-gpus-total 8 --num-gpus-per-model 8"
    evaluation_stages:
      - generation
      - judgement
    inference_params:
      top_p: 0.00001
      top_k: 1
      temperature: 0.00001
      stop: []
      tokens_to_generate: 1024
    judge_model:
      model_type: llm
      llm_type: nvidia-llmgateway
      llm_name: gpt-4
      use_cache: false
      use_chat_endpoint: true
      base_url: https://prod.api.nvidia.com/llm/v1/azure/
      api_version: 2023-12-01-preview
      client_id: xxx
      client_secret: xxx
      scope: azureopenai-readwrite
      grant_type: client_credentials
    judge_inference_params:
      top_p: 0.00001
      top_k: 1
      temperature: 0.00001
      stop: []
      tokens_to_generate: 1024