TensorRT-LLMs/tests/integration/defs/perf/base_perf_pytorch.csv
Eran Geva d4b3bae5af
[#8391][fix] check perf by device subtype (#8428)
Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com>
2025-10-22 12:38:05 +03:00

7.7 KiB

1network_nameperf_case_nametest_namethresholdabsolute_thresholdmetric_typeperf_metricdevice_subtype
2llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.205000INFERENCE_TIME109007.96
3llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.205SEQ_THROUGHPUT76.45
4llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.20500TOKEN_THROUGHPUT9785.75
5llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.202KV_CACHE_SIZE55.64
6llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.205000INFERENCE_TIME171845.02H100_PCIe
7llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.202KV_CACHE_SIZE57.17H100_PCIe
8llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.205SEQ_THROUGHPUT48.09H100_PCIe
9llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.20500TOKEN_THROUGHPUT6155.59H100_PCIe
10llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.205000INFERENCE_TIME139897.82H100_NVL
11llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.202KV_CACHE_SIZE69.59H100_NVL
12llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.205SEQ_THROUGHPUT58.63H100_NVL
13llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.20500TOKEN_THROUGHPUT7504.07H100_NVL
14llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.205000INFERENCE_TIME125068.76H100
15llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]0.202KV_CACHE_SIZE57.09H100
16llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.205SEQ_THROUGHPUT65.50H100
17llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]-0.20500TOKEN_THROUGHPUT8384.00H100