TensorRT-LLMs/tests/integration/defs/.test_durations
Emma Qiao 2f48985b9c
infra: Add step to generate new duration file (#3298)
* Add step to generate new duration file

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Install python in earlier step

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Clone repo and add debug info

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Remove debug info and only generate duration for post-merge

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Test for the new duration file

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Update the duration file format

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

* Move generate_duration.py to scripts folder and add try-catch avoiding any broken

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>

---------

Signed-off-by: EmmaQiaoCh <qqiao@nvidia.com>
2025-04-18 12:56:31 +08:00

401 lines
47 KiB
Plaintext

{
"examples/test_llama.py::test_llm_llama_v3_2_smoothquant_1node_single_gpu[llama-3.2-1b]": 179.50230779591948,
"examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen_7b_chat_int4-nb:4]": 550.7549150055274,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime]": 200.3679922982119,
"test_accuracy.py::test_accuracy_gpt[gpt-use-int4-weight-only-quant]": 124.97977197915316,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm.py -m \"part0\"]": 1722.5026792194694,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others]": 745.9777404600754,
"examples/test_gpt.py::test_llm_starcoder2_sqootb_single_gpu[starcoder2]": 548.202868571505,
"examples/test_llama.py::test_llm_llama_int8_sq_ootb_1gpu_summary[llama-7b-nb:1]": 473.6016059508547,
"examples/test_llama.py::test_llm_llama_v3_1_quantization_1gpu_manage_weights[llama-3.1-8b-int4_wo]": 362.807158597745,
"examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen2_7b_awq-nb:1]": 534.9901599064469,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_cpp_runtime]": 215.76758538931608,
"test_accuracy.py::test_accuracy_gpt[gpt-smooth-quant]": 160.11085808090866,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"part0\"]": 1467.010510167107,
"examples/test_llama.py::test_llm_llama_smooth_quant_1gpu_summary[float16-llama-7b-enable_ptpc-nb:4]": 474.5004307180643,
"examples/test_llama.py::test_llm_llama_v3_int8_gptq_1gpu_summary[llama-v3-8b-instruct-hf-float16-nb:1]": 564.3716260530055,
"test_accuracy.py::test_accuracy_gpt[gpt-smooth-quant-per-token-per-channel]": 195.74817313067615,
"test_accuracy.py::test_accuracy_gpt[gpt-use-int8-kv-cache]": 152.01638494804502,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"part1\"]": 702.5636156499386,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"not (part0 or part1)\"]": 573.0894306898117,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm.py -m \"not part0\"]": 2185.3989118468016,
"examples/test_llama.py::test_llm_llama_int8_kv_1gpu_summary[llama-7b-enable_weight_only-nb:4]": 285.5400769393891,
"examples/test_llama.py::test_llm_llama_int8_kv_awq_1gpu_summary[llama-7b-nb:4]": 753.5694852620363,
"examples/test_llama.py::test_llm_llama_v3_1_quantization_1gpu_manage_weights[llama-3.1-8b-int8_sq]": 229.33236890286207,
"test_accuracy.py::test_accuracy_gpt[gpt-use-int8-weight-only-quant]": 138.9683292657137,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_executor.py]": 384.39576085843146,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_quant.py]": 347.35647444298957,
"test_accuracy.py::test_accuracy_gpt_next[gpt-next]": 137.4397995332256,
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_level_api.py]": 25.342884634970687,
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_quantization.py]": 157.4453641991131,
"test_accuracy.py::test_accuracy_santacoder[santacoder-context-fmha-enabled]": 134.59484098665416,
"test_unittests.py::test_unittests_v2[unittest/trt/model/eagle]": 145.5690604839474,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt_e2e.py]": 618.984766190988,
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_api_multi_gpu.py]": 26.751821771264076,
"test_accuracy.py::test_accuracy_gpt[gpt-weight-streaming-ootb]": 152.701959496364,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition2\"]": 92.527716698125,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_llama.py]": 2248.572680544108,
"disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]": 89.62453198479488,
"test_accuracy.py::test_accuracy_gpt[gpt-paged-kv-cache]": 111.11273829906713,
"test_e2e.py::test_llmapi_quickstart_atexit": 95.69416327599902,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition3\"]": 90.72128840722144,
"test_unittests.py::test_unittests_v2[unittest/trt/functional]": 554.7785050286911,
"test_accuracy.py::test_accuracy_eagle[eagle-vicuna-7b-cuda-graph-chunked-context-EAGLE-Vicuna-7B-v1.3]": 198.61143697658554,
"test_accuracy.py::test_accuracy_medusa[medusa-vicuna-7b-medusa-vicuna-7b-v1.3]": 198.49211270920932,
"test_accuracy.py::test_accuracy_santacoder[santacoder-context-fmha-fp32-acc-enabled]": 143.5208842480206,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"xqa_generic\"]": 757.2779654711485,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_perf_evaluator.py]": 105.96611958928406,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_falcon.py]": 83.617671193555,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gptj.py]": 148.871768290177,
"disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 121.10945729538798,
"examples/test_llama.py::test_llm_llama_v1_1gpu[llama-7b-float16-inference-disable_weight_only_groupwise_quant_matmul_plugin-nb:1]": 208.22942463308573,
"test_accuracy.py::test_accuracy_eagle[eagle-vicuna-7b-cuda-graph-EAGLE-Vicuna-7B-v1.3]": 253.0358850120101,
"test_accuracy.py::test_accuracy_gpt[gpt-remove-padding-beam-search]": 114.19244751892984,
"test_accuracy.py::test_accuracy_mamba[mamba-130m]": 103.87942005699733,
"test_accuracy.py::test_accuracy_medusa[medusa-vicuna-7b-cuda-graph-medusa-vicuna-7b-v1.3]": 188.91934169409797,
"test_cpp.py::test_model[mamba-86]": 927.5912847034633,
"test_e2e.py::test_llmapi_exit": 30.026744440197945,
"test_e2e.py::test_openai_chat_example": 803.0726698227227,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_build_cache.py]": 31.214379638433456,
"examples/test_llama.py::test_llm_llama_1gpu[llama-v2-7b-hf-disable_fp8-float16-inference-nb:1]": 218.79535200074315,
"test_accuracy.py::test_accuracy_gpt[gpt-mmha-multi-block-mode]": 98.81222484912723,
"test_cache.py::test_cache_sanity": 0.0005080550909042358,
"test_e2e.py::test_build_time_benchmark_sanity": 87.00242731813341,
"test_e2e.py::test_gpt3_175b_1layers_build_only": 140.5419584736228,
"test_e2e.py::test_llmapi_load_ckpt_from_convert_command": 218.90925028920174,
"test_e2e.py::test_llmapi_load_engine_from_build_command_with_lora[llama-llama-models-v2/llama-v2-7b-hf]": 197.1922948397696,
"test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency]": 314.65844955900684,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition1\"]": 93.49747322313488,
"test_unittests.py::test_unittests_v2[unittest/bindings]": 987.0220801904798,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_mamba.py]": 81.90508931875229,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertModel-bert/bert-base-uncased]": 124.03865230083466,
"examples/test_gpt.py::test_llm_gpt2_starcoder2[float16-tp1]": 132.7881091721356,
"test_accuracy.py::test_accuracy_gpt[gpt-context-fmha-enabled]": 289.3652268294245,
"test_accuracy.py::test_accuracy_lookahead[lookahead-vicuna-7b-vicuna-7b-v1.3]": 193.08203971700277,
"test_cpp.py::test_model[medusa-86]": 559.363546602428,
"test_e2e.py::test_llmapi_example_quantization": 747.6924076671712,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition0\"]": 97.04704199638218,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention_IFB.py]": 112.25791454897262,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_mistral.py]": 335.7435329668224,
"test_unittests.py::test_unittests_v2[unittest/test_model_runner_cpp.py]": 1020.8425698600477,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-130m-float16-enable_gemm_plugin]": 338.6163583025336,
"test_accuracy.py::test_accuracy_eagle[eagle-vicuna-7b-cuda-graph-typical-acceptance-EAGLE-Vicuna-7B-v1.3]": 294.03945077583194,
"test_accuracy.py::test_accuracy_gpt[gpt-cuda-graph]": 97.79930868372321,
"test_accuracy.py::test_accuracy_gpt[gpt-remove-padding]": 117.79665404598927,
"test_cpp.py::test_model[recurrentgemma-86]": 318.2166636362672,
"test_e2e.py::test_llmapi_chat_example": 107.37903925031424,
"test_e2e.py::test_llmapi_server_example": 103.72746162861586,
"test_e2e.py::test_openai_misc_example": 252.866087988019,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_utils.py]": 95.8807710558176,
"test_unittests.py::test_unittests_v2[unittest/trt/quantization]": 1397.4090217519552,
"disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]": 54.34289716929197,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin]": 578.6178857460618,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]": 108.6528849825263,
"test_accuracy.py::test_accuracy_eagle[eagle-vicuna-7b-EAGLE-Vicuna-7B-v1.3]": 246.2644872769015,
"test_accuracy.py::test_accuracy_gpt[gpt-beam-search]": 102.1895511681214,
"test_cpp.py::test_model[eagle-86]": 469.773057743907,
"test_cpp.py::test_model[redrafter-86]": 307.86266888678074,
"test_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf]": 250.28181543946266,
"test_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf]": 190.90881541371346,
"test_unittests.py::test_unittests_v2[unittest/api_stability]": 32.895440101623535,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention_no_cache.py]": 48.11189826577902,
"examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 98.66860056994483,
"test_e2e.py::test_llmapi_example_customize": 130.17279473505914,
"test_e2e.py::test_llmapi_example_guided_decoding": 117.32164042396471,
"test_e2e.py::test_llmapi_example_inference": 128.04176313988864,
"test_e2e.py::test_llmapi_example_inference_async": 116.29321905039251,
"test_e2e.py::test_llmapi_example_inference_async_streaming": 105.0340089709498,
"test_e2e.py::test_llmapi_example_logits_processor": 107.31057577906176,
"test_e2e.py::test_llmapi_example_lookahead_decoding": 122.74548233486712,
"test_e2e.py::test_llmapi_example_multilora": 122.96416146494448,
"test_e2e.py::test_llmapi_quickstart": 107.21513352356851,
"examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-BertModel-bert/bert-base-uncased]": 103.17987485975027,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]": 121.76734105497599,
"examples/test_gemma.py::test_llm_gemma_1gpu_mmlu[gemma-2b-it-flax-other-bfloat16-8]": 334.2920230771415,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2b-torch-other-bfloat16-8]": 271.14085192000493,
"examples/test_gpt.py::test_llm_gpt2_1gpu[enable_gemm_plugin-disable_attention_plugin]": 157.16826730407774,
"examples/test_gpt.py::test_llm_gpt2_starcoder2[bfloat16-tp1]": 193.97186516784132,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-130m-float16-disable_gemm_plugin]": 124.34539587795734,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-disable_gemm_plugin]": 123.01568439602852,
"examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8]": 314.9849800863303,
"test_accuracy.py::test_accuracy_gptj[gptj-cyclic-and-paged-kv-cache]": 173.45949043799192,
"test_accuracy.py::test_accuracy_gptj[gptj-cyclic-kv-cache-beam-search]": 231.8624299732037,
"test_accuracy.py::test_accuracy_gptj[gptj-mmha-multi-block-mode]": 182.0765182878822,
"test_e2e.py::test_gpt_fp32[use_cpp_session]": 102.94305092096329,
"test_e2e.py::test_gpt_fp32[use_py_session-multi_query_mode]": 101.54258136451244,
"test_e2e.py::test_gpt_fp32[use_py_session]": 100.1567601710558,
"test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding]": 160.73923111706972,
"test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding]": 143.2220597937703,
"test_e2e.py::test_llama_e2e[use_py_session]": 145.43956408649683,
"test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding]": 141.839773863554,
"examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-RobertaModel-bert/roberta-base]": 114.17011476494372,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaModel-bert/roberta-base]": 124.41121347621083,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2b-it-flax-wo_int4-bfloat16-8]": 432.4165447750129,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2b-wo_int8-bfloat16-8]": 225.77532899565995,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-disable_gemm_plugin]": 388.2712464146316,
"examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8]": 364.87489057704806,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8]": 401.16773408837616,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8]": 342.8972608819604,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8]": 399.8077963553369,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8]": 385.8487260323018,
"test_accuracy.py::test_accuracy_gpt[gpt-context-fmha-disabled]": 96.56836012890562,
"test_accuracy.py::test_accuracy_gptj[gptj-context-fmha-enabled]": 227.20399192301556,
"test_accuracy.py::test_accuracy_gptj[gptj-cyclic-kv-cache]": 168.16926325811073,
"test_e2e.py::test_gpt_fp32[use_cpp_session-multi_query_mode]": 102.81246098689735,
"test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding]": 178.69259701482952,
"test_e2e.py::test_mistral_e2e[use_py_session]": 158.53167643211782,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_bert_attention.py]": 99.79678613925353,
"examples/test_llama.py::test_llm_llama_v2_1gpu_sparsity[llama-v2-7b-hf-enable_weight_sparsity]": 245.13967342674732,
"examples/test_prompt_lookup.py::test_llm_prompt_lookup_1gpu[streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-prompt_lookup_num_tokens_8-float16-bs1]": 177.83264934271574,
"test_cpp.py::test_model[gpt-80]": 3140.499032407999,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mllama\"]": 562.70900757052,
"test_unittests.py::test_unittests_v2[unittest/_torch -k \"not (modeling or multi_gpu or auto_deploy)\"]": 646.2237322237343,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs2]": 220.46912331692874,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs2]": 222.54111004807055,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:2-disable_fp8]": 203.55354792065918,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 189.6864925120026,
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:1]": 473.8068177103996,
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:4]": 205.28752172738314,
"examples/test_multimodal.py::test_llm_multimodal_general[deplot-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 179.15185776166618,
"examples/test_prompt_lookup.py::test_llm_prompt_lookup_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-prompt_lookup_num_tokens_8-float16-bs1]": 233.80333462916315,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2_0.5b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha]": 123.65003899484873,
"test_cpp.py::test_unit_tests[80]": 1176.5702936146408,
"test_unittests.py::test_unittests_v2[unittest/_torch -k \"modeling_llama\"]": 448.5385442841798,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_qwen\"]": 396.0599227370694,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs1]": 220.48439004272223,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs1]": 221.33857776224613,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 202.78905210644007,
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization]": 766.289718978107,
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization_long]": 354.7249199002981,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 267.43367490172386,
"test_unittests.py::test_unittests_v2[unittest/_torch/auto_deploy/unit/singlegpu]": 715.1035223379731,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_out_of_tree\"]": 94.79181066202,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition0\"]": 230.79139936715364,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs1]": 220.48733178526163,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs2]": 238.1519063487649,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-disable_gemm_plugin-disable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 205.46519743651152,
"examples/test_granite.py::test_llm_granite[granite-3.0-1b-a400m-instruct-bfloat16]": 368.9064020588994,
"examples/test_internlm.py::test_llm_internlm2_7b_1node_1gpu[bfloat16-enable_context_fmha-enable_gemm_plugin-enable_attention_plugin-nb:2]": 294.4753697216511,
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-chunked_summarization_long]": 443.5032472461462,
"examples/test_prompt_lookup.py::test_llm_prompt_lookup_1gpu[streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-prompt_lookup_num_tokens_8-float16-bs2]": 191.30353821069002,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2.5_1.5b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha]": 162.48481699824333,
"test_cpp.py::test_benchmarks[gpt-80]": 1252.3170381858945,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_nemotron\"]": 330.17481607571244,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition3\"]": 921.5470168888569,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_nemotron_nas.py -k \"not fp8\"]": 289.01844235509634,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs2]": 223.6047435477376,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-bfloat16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 204.12767488509417,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-disable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 194.45501364022493,
"examples/test_granite.py::test_llm_granite[granite-3.0-2b-instruct-bfloat16]": 362.70301412791014,
"examples/test_llama.py::test_llm_llama_v1_manage_weights_1gpu_summarize[llama-7b]": 273.7324623838067,
"examples/test_llama.py::test_llm_llama_v2_1gpu_auto_parallel[llama-v2-7b-hf]": 527.7862892448902,
"examples/test_multimodal.py::test_llm_multimodal_general[Qwen2-VL-7B-Instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:4]": 355.9665117710829,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2_7b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha]": 334.18761303275824,
"examples/test_qwen2audio.py::test_llm_qwen2audio_single_gpu[qwen2_audio_7b_instruct]": 373.8418433815241,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_nemotron_nas\"]": 587.2612264081836,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition2\"]": 1086.5458996072412,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs1]": 216.4487509690225,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 213.7413339074701,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:2-disable_fp8]": 196.13724466226995,
"examples/test_gpt.py::test_llm_minitron[4b-bfloat16-full_prec]": 174.4602623153478,
"examples/test_llama.py::test_llm_llama_v2_1gpu_weight_streaming[llama-v2-7b-hf-1.0-plugin]": 1229.6853012256324,
"examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1]": 209.74914916604757,
"examples/test_prompt_lookup.py::test_llm_prompt_lookup_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-prompt_lookup_num_tokens_8-float16-bs2]": 176.01735756732523,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen_7b_chat-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha]": 346.44035330042243,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mixtral\"]": 207.87299499381334,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"other\"]": 119.17958049662411,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition1\"]": 263.92830369621515,
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[streaming-use_cpp_session-enable_gemm_plugin]": 107.15047403424978,
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[non_streaming-use_cpp_session]": 180.58306914567947,
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[streaming-use_cpp_session]": 181.08415200561285,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int8_sq-float16-enable_attn_plugin-enable_gemm_plugin]": 556.3153209090233,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-disable_attn_plugin-enable_gemm_plugin]": 246.07540269941092,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]": 184.53705009818077,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]": 185.134624697268,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-disable_gemm_plugin-disable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 199.5346100255847,
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[non_streaming-use_cpp_session-enable_gemm_plugin]": 105.90890420973301,
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[non_streaming-use_cpp_session]": 179.99799578636885,
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[non_streaming-use_py_session]": 180.62669630348682,
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[streaming-use_cpp_session]": 179.8604817390442,
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[non_streaming-use_py_session]": 180.72864849120378,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]": 211.51401184499264,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int4_awq-float16-enable_attn_plugin-enable_gemm_plugin]": 486.34902361780405,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-flax-no_paged_cache-disable_quant-float16-enable_attn_plugin-disable_gemm_plugin]": 287.17211075872183,
"examples/test_llama.py::test_llm_llama_1gpu[llama-v3-8b-instruct-hf-enable_fp8-float16-summarization-nb:1]": 222.08693411946297,
"examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-v3-8b-instruct-hf-disable_norm_quant_fusion-disable_fused_quant-fp4_ootb-float16]": 543.2480030350853,
"examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-v3-8b-instruct-hf-disable_norm_quant_fusion-enable_fused_quant-fp4_plugin-float16]": 378.6253670700826,
"examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-v3-8b-instruct-hf-enable_norm_quant_fusion-enable_fused_quant-fp4_plugin-float16]": 343.2268390309764,
"examples/test_mixtral.py::test_llm_mixtral_1gpu_fp4_llmapi[Mixtral-8x7B-Instruct-v0.1]": 274.7116751889698,
"examples/test_pytorch.py::test_llm_llama_1gpu[llama-3.1-8b-enable_fp4]": 397.44854424195364,
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]": 47.77508906694129,
"test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu_modeling -k \"deepseek and tp1 and nextn0\"]": 1501.4569957539788,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"trtllm_gen\"]": 492.1111381390365,
"examples/test_llama.py::test_llm_llama_1gpu[llama-v3-8b-instruct-hf-disable_fp8-float16-summarization-nb:1]": 327.472873901017,
"examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-v3-8b-instruct-hf-disable_norm_quant_fusion-disable_fused_quant-fp4_plugin-float16]": 479.9771699520061,
"examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-v3-8b-instruct-hf-enable_norm_quant_fusion-disable_fused_quant-fp4_plugin-float16]": 356.6772044340032,
"examples/test_llama.py::test_llm_llama_1gpu_fp4_model_config[llama-v3-8b-instruct-hf-fp4_plugin]": 53.952199321996886,
"examples/test_pytorch.py::test_llm_deepseek_1gpu[deepseek-v3-lite-disable_fp8-enable_fp4]": 324.4154664159869,
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]": 75.52178371301852,
"test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]": 112.16112289205194,
"test_unittests.py::test_unittests_v2[unittest/trt/functional/test_fp4_gemm.py]": 239.35822420899058,
"deterministic/test_mixtral_deterministic.py::test_llm_mixtral_4gpus_deterministic[Mixtral-8x7B-Instruct-v0.1-float16]": 514.8680490620318,
"examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4]": 308.7925306240213,
"test_cpp.py::test_multi_gpu[90]": 2131.593547832046,
"test_e2e.py::test_llmapi_example_distributed_tp2": 69.02365298604127,
"test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu]": 1517.2648563559633,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_multi_gpu.py -m \"gpu2 and part0\"]": 273.0379349630093,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_multi_gpu.py -m \"not (gpu2 or gpu4)\"]": 745.5187598060002,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8[DeepSeek-V3-Lite-fp8]": 220.82791428617202,
"disaggregated/test_disaggregated.py::test_disaggregated_multi_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 61.23355127801187,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2b-fp8-bfloat16-8]": 276.1408422719687,
"examples/test_llama.py::test_llm_llama_2gpu_fp8_summary[llama-7b-enable_reduce_fusion-disable_fp8_context_fmha_xqa]": 242.57844000402838,
"examples/test_llama.py::test_llm_llama_4gpu_pp4[TinyLlama-1.1B-Chat-v1.0-float16-nb:1]": 113.20934204105288,
"examples/test_llama.py::test_llm_llama_v2_4gpu_tp2cp2[llama-v2-7b-hf-float16-nb:1]": 185.70166881894693,
"examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora]": 579.7677474520169,
"examples/test_nemotron_nas.py::test_nemotron_nas_summary_2gpu[DeciLM-7B]": 229.0144616710022,
"test_unittests.py::test_unittests_v2[unittest/_torch/auto_deploy/unit/multigpu]": 385.15193357504904,
"test_unittests.py::test_unittests_v2[unittest/trt/functional/test_allreduce_norm.py]": 33.17591990181245,
"test_unittests.py::test_unittests_v2[unittest/llmapi/apps/_test_openai_multi_gpu.py -m \"part0\"]": 397.4450480469968,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8]": 198.35416324809194,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8]": 198.35416324809194,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:2-nb:1-enable_fp8]": 333.3260381403379,
"examples/test_llama.py::test_llm_llama_2gpu_fp8_summary[llama-7b-disable_reduce_fusion-enable_fp8_context_fmha_xqa]": 398.4714640309103,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-8b-enable_fp8]": 563.2787040728144,
"examples/test_llama.py::test_llm_llama_v3_8b_1048k_long_context_ppl[passkey-Llama-3-8B-Instruct-Gradient-1048k]": 445.73215844482183,
"test_e2e.py::test_llmapi_exit_multi_gpu": 26.371326874941587,
"test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu_modeling -k \"llama and not (tp1 and pp1)\"]": 159.40950463991612,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_multi_gpu.py -m \"gpu4 and part0\"]": 140.50599069194868,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp[DeepSeek-V3-Lite-fp8]": 234.40521239489317,
"examples/test_llama.py::test_llm_llama_2gpu_fp8_summary[llama-7b-disable_reduce_fusion-disable_fp8_context_fmha_xqa]": 362.4640129506588,
"examples/test_llama.py::test_llm_llama_2gpu_fp8_summary[llama-7b-enable_reduce_fusion-enable_fp8_context_fmha_xqa]": 125.09478603303432,
"examples/test_llama.py::test_llm_llama_v2_fp8_2gpu_cp2[llama-v2-7b-hf-float16-nb:1]": 207.04526684433222,
"examples/test_llama.py::test_llm_llama_v2_fp8_2gpu_pp2[llama-v2-7b-hf-bfloat16-nb:1]": 153.0728387348354,
"examples/test_llama.py::test_llm_llama_v2_lora_benchmark_2gpu[chinese_lora-llama-v2-13b-hf]": 714.0935160629451,
"examples/test_mixtral.py::test_llm_mixtral_fp8_managed_weights_4gpus_summary[Mixtral-8x7B-v0.1]": 990.6669274643064,
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-cpp_e2e:False-nb:1]": 384.2949004136026,
"test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu_modeling -k \"deepseek and not (tp1 and pp1) and nextn0\"]": 1567.030888363719,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_multi_gpu.py -m \"gpu2 and part1\"]": 503.9076916947961,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_multi_gpu.py -m \"gpu2 and part2\"]": 250.59611881896853,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_multi_gpu.py -m \"gpu2 and part3\"]": 249.24695850908756,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models_multi_gpu.py]": 576.8418473489583,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]": 125.20705968420953,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 110.46942141931504,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]": 105.9574088291265,
"examples/test_dit.py::test_llm_dit_multiple_gpus[dit-xl-2-256x256-tp1]": 225.35873269103467,
"examples/test_llama.py::test_llm_llama_v2_awq_2gpu_summary[Llama-2-7B-AWQ-nb:1]": 283.3380792280659,
"examples/test_llama.py::test_llm_llama_v2_awq_2gpu_summary[Llama-2-7B-GPTQ-nb:4]": 171.74356483994052,
"examples/test_llama.py::test_llm_llama_v2_awq_2gpu_summary[llama-v2-7b-hf-nb:1]": 380.5473432317376,
"examples/test_llama.py::test_llm_llama_v2_int8sq_2gpu_tp2[llama-v2-7b-hf-bfloat16-nb:1]": 181.12843894818798,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[disable_gemm_allreduce_plugin-llama-3.1-8b-disable_fp8]": 551.3369894148782,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[disable_gemm_allreduce_plugin-llama-3.1-8b-enable_fp8]": 519.526911018882,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-8b-disable_fp8]": 568.2065577358007,
"examples/test_mamba.py::test_llm_mamba2_2gpu[mamba-codestral-7B-v0.1]": 238.0765609210357,
"examples/test_phi.py::test_llm_phi_1node_2gpus_summary[phi-2-nb:4]": 148.22429683618248,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_2gpu[recurrentgemma-2b]": 215.24848986929283,
"test_e2e.py::test_llmapi_example_distributed_autopp_tp2": 110.57169062783942,
"test_e2e.py::test_llmapi_quant_llama_70b": 1359.890202102717,
"test_unittests.py::test_unittests_v2[unittest/_torch/auto_deploy/integration/test_ad_build.py]": 804.2961544720456,
"examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]": 238.009345151484,
"examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]": 229.22342134127393,
"test_e2e.py::test_llmapi_example_medusa_decoding_use_modelopt": 274.5927692130208,
"test_accuracy.py::test_accuracy_long_alpaca[long-alpaca-7b-multiblock-aggressive]": 179.84497827105224,
"test_accuracy.py::test_accuracy_phi[phi-2-phi-context-fmha-enabled]": 130.27221551164985,
"test_accuracy.py::test_accuracy_phi[phi-2-phi-mmha-multi-block-mode]": 114.19020783156157,
"test_cpp.py::test_unit_tests[90]": 1378.5633971448988,
"test_e2e.py::test_model_api_examples": 314.3335437886417,
"test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-enable_request_rate]": 266.77701852843165,
"test_unittests.py::test_unittests_v2[unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py]": 221.25409611687064,
"test_unittests.py::test_unittests_v2[unittest/trt/quantization/test_weight_only_quant_matmul.py]": 102.99527521245182,
"examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-disable_fp8]": 436.80323184095323,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2-9b-it-other-bfloat16-8]": 424.7785783447325,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2b-int8_sq-bfloat16-8]": 198.51798786222935,
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]": 342.04199170693755,
"examples/test_pytorch.py::test_llm_llama_1gpu[llama-3.1-8b-disable_fp4]": 585.6307429242879,
"examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-disable_weight_only]": 355.566904416075,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 246.2873280600179,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2b-it-flax-fp8_kv_cache-bfloat16-8]": 115.74243776104413,
"examples/test_llama.py::test_llm_llama_v2_1gpu_fp8_gemv[llama-v2-7b-hf]": 225.53843238204718,
"examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.2-1b-disable_fp8]": 100.95579409133643,
"examples/test_qwen.py::test_llm_hf_qwen_multi_lora_1gpu[qwen2.5_1.5b_instruct]": 140.88227126700804,
"test_accuracy.py::test_accuracy_gptj[gptj-fp8-kv-cache]": 255.14463341981173,
"test_cpp.py::test_model[fp8-llama-90]": 778.0537680378184,
"test_e2e.py::test_trtllm_bench_help_sanity[meta-llama/Llama-3.1-8B]": 85.56777227623388,
"examples/test_gpt.py::test_llm_gpt2_medium_fp8[False]": 221.49930329900235,
"examples/test_llama.py::test_llm_llama_1gpu[llama-v2-7b-hf-enable_fp8-float16-summarization-nb:2]": 191.30041963700205,
"examples/test_llama.py::test_llm_llama_v2_1gpu_low_latency_gemm[llama-v2-7b-hf-fp8]": 90.6559073231183,
"examples/test_pytorch.py::test_llm_deepseek_1gpu[deepseek-v3-lite-enable_fp8-disable_fp4]": 620.9891363149509,
"test_cpp.py::test_benchmarks[bart-90]": 238.44337234133855,
"test_cpp.py::test_model[enc_dec_language_adapter-90]": 217.1860674340278,
"test_cpp.py::test_model[fp8-gptj-90]": 939.3910294710658,
"test_e2e.py::test_benchmark_sanity_enable_fp8[gptj_6b]": 150.5581414680928,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.2-1b]": 136.27177622704767,
"examples/test_llama.py::test_llm_llama_v2_1gpu_gemm_swiglu[llama-v2-7b-hf-fp8-float16]": 442.70912846399006,
"examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-scienceqa-Llama-3.2-11B-Vision-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False]": 405.8749147169874,
"test_cpp.py::test_model[bart-90]": 218.34018443501554,
"test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]": 142.93065852398286,
"test_unittests.py::test_unittests_v2[unittest/trt/functional/test_moe.py]": 234.94054578302894,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2b-keras-fp8_kv_cache-bfloat16-8]": 388.2805628060596,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2b-int4_awq-bfloat16-8]": 342.3824067909736,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.1-8b]": 236.03740064997692,
"examples/test_llama.py::test_llm_llama_1gpu[llama-3.1-8b-instruct-hf-fp8-enable_fp8-float16-summarization-nb:1]": 223.46429320902098,
"examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-cnn_dailymail-Qwen2-VL-7B-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False]": 699.8648770479485,
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1]": 406.29480336094275,
"test_accuracy.py::test_accuracy_large_beam_width_search[large-beam-width-search]": 386.6833416179288,
"test_e2e.py::test_benchmark_sanity_enable_fp8[llama_7b]": 172.197191352956,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 444.78757989406586,
"examples/test_gpt.py::test_llm_gpt2_medium_fp8[True]": 290.976373674348,
"examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.1-8b-disable_fp8]": 198.46642523631454,
"test_cpp.py::test_benchmarks[t5-90]": 265.8048793170601,
"test_cpp.py::test_model[t5-90]": 218.41966092959046,
"test_e2e.py::test_benchmark_sanity_enable_fp8[gpt_350m]": 165.21792945079505,
"examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-enable_fp8]": 549.0880617420189,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-7b-it-flax-wo_int4-bfloat16-8]": 314.90914471261203,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-7b-it-flax-wo_int8-float16-8]": 259.74879706604406,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2b-int4_awq-float16-8]": 595.6029811110348,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-7b-fp8-float16-8]": 389.76426160987467,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-7b-int4_awq-float16-8]": 881.7010877672583,
"examples/test_granite.py::test_granite_bf16_lora[granite-3.0-2b-instruct]": 220.11962961405516,
"examples/test_llama.py::test_llm_llama_v2_1gpu_fp8_summary_and_mmlu[llama-v2-7b-hf-enable_fp8_fmha-enable_mmlu_test]": 671.4116345108487,
"examples/test_llama.py::test_llm_llama_v2_1gpu_fp8_summary_and_mmlu[llama-v2-7b-hf-enable_fp8_paged_fmha-disable_mmlu_test]": 110.50635690474883,
"examples/test_llama.py::test_llm_llama_v3_1_autoq_1gpu_mmlu[llama-3.1-8b]": 840.0003815609962,
"test_accuracy.py::test_accuracy_phi[phi-2-phi-context-fmha-disabled]": 114.22529838001356,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gptneox.py]": 227.26687153801322,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 1662.0536208283156,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-7b-keras-fp8_kv_cache-float16-8]": 310.8335256492719,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-7b-keras-wo_int8-float16-8]": 308.7603419777006,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-7b-torch-other-bfloat16-8]": 313.05099336616695,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2b-fp8-float16-8]": 252.7731210407801,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2b-int8_sq-float16-8]": 228.12679186835885,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-7b-fp8-bfloat16-8]": 426.5599648784846,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-7b-int4_awq-bfloat16-8]": 1012.6150853093714,
"examples/test_granite.py::test_granite_bf16_lora[granite-3.0-1b-a400m-instruct]": 165.13228186033666,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-v3-8b-instruct-hf]": 195.20884297974408,
"examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1]": 346.2961323596537,
"examples/test_multimodal.py::test_llm_multimodal_general[neva-22b-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]": 619.2907309047878,
"test_accuracy.py::test_accuracy_gptj[gptj-context-fmha-disabled]": 228.66582870762795,
"test_accuracy.py::test_accuracy_gptj[gptj-float32]": 213.06748974882066,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_bloom.py]": 136.28332487586886,
"examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.1-8b-enable_fp8]": 889.8317703623325,
"examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-mini-128k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1]": 216.54395807394758,
"examples/test_qwen.py::test_llm_qwen1_5_7b_single_gpu_lora[qwen1.5_7b_chat-Qwen1.5-7B-Chat-750Mb-lora]": 272.396039951127,
"test_e2e.py::test_ptp_quickstart_bert[BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 102.97587639791891,
"examples/test_llama.py::test_llm_llama_v1_1gpu_streaming_llm[llama-7b-enable_gemm_plugin-nb:4]": 796.9408070724458,
"examples/test_llama.py::test_llm_llama_v3_dora_1gpu[commonsense-llama-v3-8b-dora-r32-llama-v3-8b-hf-base_fp16]": 555.0123887173831,
"test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video]": 587.7560013290495,
"examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.1-8b-enable_fp8_meta_recipe]": 602.5631069587544,
"examples/test_phi.py::test_llm_phi_lora_1gpu[Phi-3-mini-4k-instruct-ru-lora-Phi-3-mini-4k-instruct-lora_fp16-base_fp16]": 230.49683402199298,
"examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-small-128k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1]": 333.6462291791104,
"test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image]": 594.4285814953037,
"examples/test_llama.py::test_llm_llama_v1_1gpu_kv_cache_reuse_with_prompt_table[llama-7b]": 211.37737978063524,
"examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_fp16]": 656.444500297308,
"examples/test_nemotron_nas.py::test_nemotron_nas_summary_1gpu[DeciLM-7B]": 856.6468322910368,
"examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-mini-4k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1]": 232.31379313208163,
"examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3-small-8k-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1]": 372.76599719561636,
"examples/test_phi.py::test_llm_phi_single_gpu_summary[Phi-3.5-mini-instruct-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1]": 236.81048927269876,
"test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]": 201.31396353803575,
"test_accuracy.py::test_accuracy_bloom[bloom-context-fmha-disabled]": 175.44133150950074,
"test_accuracy.py::test_accuracy_bloom[bloom-mmha-multi-block-mode]": 171.94136535190046,
"examples/test_phi.py::test_llm_phi_single_gpu_summary[phi-2-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_fmha_with_fp32_acc-nb:1]": 266.6327917207964,
"test_accuracy.py::test_accuracy_bloom[bloom-context-fmha-enabled]": 167.2855539782904
}