mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
23 lines
560 B
JSON
23 lines
560 B
JSON
{
|
|
"builder_config": {
|
|
"max_batch_size": 256,
|
|
"max_input_len": 512,
|
|
"name": "bert",
|
|
"precision": "float16",
|
|
"tensor_parallel": 1,
|
|
"use_refit": false
|
|
},
|
|
"plugin_config": {
|
|
"bert_attention_plugin": "float16",
|
|
"context_fmha_enabled": true,
|
|
"gemm_plugin": "float16",
|
|
"gpt_attention_plugin": false,
|
|
"identity_plugin": false,
|
|
"layernorm_plugin": false,
|
|
"layernorm_quantization_plugin": false,
|
|
"nccl_plugin": false,
|
|
"smooth_quant_gemm_plugin": false,
|
|
"weight_only_quant_matmul_plugin": false
|
|
}
|
|
}
|