TensorRT-LLMs/tests/integration/evaltool/eval_configs/wikilingua.yml

model:
  model_type: llm
  llm_type: openai-apicompatible
  llm_name: <llm-name>
  container: null
  use_cache: false
  use_chat_endpoint: false
  is_hf_model: false
  tokenizer_path: <tokenizer-path>
  base_url: http://localhost:12478/v1

evaluations:
  - eval_type: automatic
    eval_subtype: lm_eval_harness
    native_args: null
    hf_token: null
    tasks:
      - task_name: wikilingua_english
        task_config: null
        num_fewshot: 3
        batch_size: 4
        bootstrap_iters: 100000
        limit: 0.05
    inference_params:
      use_greedy: true
      top_p: 0.00001
      top_k: 1
      temperature: 0.00001
      stop:
        - <|endoftext|>
      tokens_to_generate: 256