TensorRT-LLMs/tests/integration/defs/perf/base_perf.csv

6 lines
1.8 KiB
Plaintext

network_name,perf_case_name,test_name,threshold,absolute_threshold,metric_type,perf_metric
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_build_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_build_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.1,30,BUILD_TIME,143.5976
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.1,50,INFERENCE_TIME,106778.60992
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,SEQ_THROUGHPUT,76.72174
"llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-TensorRT-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,TOKEN_THROUGHPUT,9820.38162