TensorRT-LLMs/triton_backend/inflight_batcher_llm/tests/third.json

{
    "parameters": {
        "gpu_device_ids": {
            "string_value": "0"
        },
        "max_beam_width": {
            "string_value": "4"
        },
        "batch_scheduler_policy": {
            "string_value": "guaranteed_no_evict"
        },
        "executor_worker_path": {
            "string_value": "/opt/tritonserver/backends/tensorrtllm/trtllmExecutorWorker"
        },
        "normalize_log_probs": {
            "string_value": "false"
        },
        "gpt_model_type": {
            "string_value": "inflight_fused_batching"
        }
    },
    "model_transaction_policy": {
        "decoupled": true
    }
}