mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-23 20:23:08 +08:00
6 lines
1.8 KiB
Plaintext
6 lines
1.8 KiB
Plaintext
network_name,perf_case_name,test_name,threshold,absolute_threshold,metric_type,perf_metric
|
|
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_build_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_build_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.1,30,BUILD_TIME,143.5976
|
|
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.1,50,INFERENCE_TIME,106778.60992
|
|
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,SEQ_THROUGHPUT,76.72174
|
|
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,TOKEN_THROUGHPUT,9820.38162
|