TensorRT-LLMs/examples/bert/large_with_attention_plugin_benchmark/config.json
2023-09-20 00:29:41 -07:00

23 lines
560 B
JSON

{
"builder_config": {
"max_batch_size": 256,
"max_input_len": 512,
"name": "bert",
"precision": "float16",
"tensor_parallel": 1,
"use_refit": false
},
"plugin_config": {
"bert_attention_plugin": "float16",
"context_fmha_enabled": true,
"gemm_plugin": "float16",
"gpt_attention_plugin": false,
"identity_plugin": false,
"layernorm_plugin": false,
"layernorm_quantization_plugin": false,
"nccl_plugin": false,
"smooth_quant_gemm_plugin": false,
"weight_only_quant_matmul_plugin": false
}
}