TensorRT-LLMs/tests/integration/defs/.test_durations
Jin Li c04563657e
[TRTLLM-7735][feat] Attention NVFP4 out support for torch compile (#9740)
Signed-off-by: Jin Li <59594262+liji-nv@users.noreply.github.com>
2025-12-27 00:07:20 +08:00

977 lines
117 KiB
Plaintext

{
"accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype": 725.8308991710655,
"accuracy/test_cli_flow.py::TestGemma2_9BIt::test_weight_only[int4]": 256.8124763760716,
"accuracy/test_cli_flow.py::TestGemma2_9BIt::test_weight_only[int8]": 234.0067440699786,
"accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb": 448.54090467840433,
"accuracy/test_cli_flow.py::TestGpt2::test_auto_dtype": 376.7764785774052,
"accuracy/test_cli_flow.py::TestGpt2::test_beam_search": 383.99572690576315,
"accuracy/test_cli_flow.py::TestGpt2::test_beam_search_large": 730.1395341157913,
"accuracy/test_cli_flow.py::TestGpt2::test_context_fmha_disabled": 356.8461561538279,
"accuracy/test_cli_flow.py::TestGpt2::test_context_fmha_fp32_acc": 68.77035509195412,
"accuracy/test_cli_flow.py::TestGpt2::test_cuda_graph": 365.0803712736815,
"accuracy/test_cli_flow.py::TestGpt2::test_gemm_plugin": 53.26832581800409,
"accuracy/test_cli_flow.py::TestGpt2::test_int8_kv_cache": 399.65961667895317,
"accuracy/test_cli_flow.py::TestGpt2::test_smooth_quant[per_token=False-per_channel=False]": 73.3218588019372,
"accuracy/test_cli_flow.py::TestGpt2::test_smooth_quant[per_token=True-per_channel=True]": 74.95476273802342,
"accuracy/test_cli_flow.py::TestGpt2::test_variable_beam_width_search": 74.62389065994648,
"accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int4]": 347.21782275289297,
"accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int8]": 358.4815372042358,
"accuracy/test_cli_flow.py::TestGpt2::test_weight_streaming_ootb": 222.17091258615255,
"accuracy/test_cli_flow.py::TestGpt2::test_weight_streaming_plugin": 72.80018049001228,
"accuracy/test_cli_flow.py::TestGpt2Medium::test_auto_dtype": 78.43281774100615,
"accuracy/test_cli_flow.py::TestGpt2Medium::test_fp8": 193.54337832704186,
"accuracy/test_cli_flow.py::TestGpt2Medium::test_fp8_lm_head": 78.71367865701905,
"accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype": 429.80293437838554,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_auto_dtype": 444.6878175288439,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8": 510.68390227202326,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_2gpus[cp2]": 180.78952708397992,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_2gpus[pp2]": 166.04589607805246,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_2gpus[tp2]": 166.75922018906567,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_gemm_plugin": 593.3573900908232,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_gemm_swiglu_plugin": 451.02113576978445,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_low_latency_gemm_plugin": 482.50407074484974,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_prequantized_tp2": 121.77989674796117,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_tp2": 272.11888975003967,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_gptq_prequantized_tp2": 134.79151003999868,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_smooth_quant": 168.3700958029367,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_smooth_quant_ootb_tp2": 173.15021856303792,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_tp2cp2": 138.08522557100514,
"accuracy/test_cli_flow.py::TestLlama2_7B::test_weight_sparsity": 613.5882918275893,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_auto_dtype": 170.19967718899716,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_autoq": 1058.2184530100785,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8": 368.3140486832708,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_meta_recipe": 634.7149123200215,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_tp4[disable_gemm_allreduce_plugin]": 0.00019256497034803033,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_tp4[enable_gemm_allreduce_plugin]": 0.00018584198551252484,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[disable_gemm_allreduce_plugin]": 0.00020851899171248078,
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[enable_gemm_allreduce_plugin]": 0.00019980798242613673,
"accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_auto_dtype": 138.02327981899725,
"accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_fp8_prequantized": 171.8214656477794,
"accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_medusa_fp8_prequantized": 651.7356893768301,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_auto_dtype": 167.0847301799804,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_cyclic_kv_cache": 0.00020177400438115,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_cyclic_kv_cache_beam_search": 0.00023707805667072535,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8": 95.26037903001998,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_pp2": 112.49999983899761,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_rowwise": 73.94236607506173,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_int4_awq": 101.8501071939827,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_int4_awq_int8_kv_cache": 392.90223736315966,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_int4_awq_manage_weights": 101.03500067297136,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_smooth_quant": 190.484365709126,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_smooth_quant_ootb": 457.93785213679075,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_smooth_quant_ootb_manage_weights": 216.66169160604477,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_weight_streaming[0.1]": 323.6820353940129,
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_weight_streaming[1.0]": 250.1382947349921,
"accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_fp8_prequantized_tp4": 814.2746116059134,
"accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_nvfp4_prequantized_tp4": 0.7675385079928674,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_auto_dtype": 347.127849099983,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_fp8": 261.4332031469967,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_int8_gptq": 578.4320518560708,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4": 286.4440165119886,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-disable_fused_quant]": 286.4440165119886,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-enable_fused_quant]": 286.4440165119886,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[enable_norm_quant_fusion-disable_fused_quant]": 286.4440165119886,
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[enable_norm_quant_fusion-enable_fused_quant]": 286.4440165119886,
"accuracy/test_cli_flow.py::TestLlama3_8BInstructGradient1048k::test_long_context": 713.1731983916834,
"accuracy/test_cli_flow.py::TestLlama3_8BInstructGradient1048k::test_long_context_ppl": 858.7944585508667,
"accuracy/test_cli_flow.py::TestLlama7B::test_auto_dtype": 402.75543826818466,
"accuracy/test_cli_flow.py::TestLlama7B::test_beam_search": 160.57701692701085,
"accuracy/test_cli_flow.py::TestLlama7B::test_int4_gptq": 141.50458357605385,
"accuracy/test_cli_flow.py::TestLlama7B::test_manage_weights": 536.7081215977669,
"accuracy/test_cli_flow.py::TestLlama7B::test_streamingllm": 601.9727729707956,
"accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype": 774.4440165119886,
"accuracy/test_cli_flow.py::TestLongAlpaca7B::test_multiblock_aggressive": 674.175037201494,
"accuracy/test_cli_flow.py::TestMamba130M::test_auto_dtype": 144.6635948382318,
"accuracy/test_cli_flow.py::TestMinitron4BBase::test_auto_dtype": 189.79791952297091,
"accuracy/test_cli_flow.py::TestMinitron4BBase::test_fp8": 378.120541986078,
"accuracy/test_cli_flow.py::TestMixtral8x22B::test_fp8_tp2pp2": 2413.1753760920255,
"accuracy/test_cli_flow.py::TestMixtral8x22B::test_int8_plugin_tp8[renormalize-tensor_parallel]": 2863.4440165119886,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin": 863.4440165119886,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp8_tp2pp2": 863.4440165119886,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp8_tp2pp2_manage_weights": 863.4440165119886,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_nvfp4_prequantized": 471.08943115500733,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_ootb_except_mha_tp8[expert_parallel]": 924.8701723880367,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_ootb_except_mha_tp8[mixed_parallel]": 912.7559759569704,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_ootb_except_mha_tp8[tensor_parallel]": 1121.821529859968,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[no_renormalize-tensor_parallel]": 347.9747967119911,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-expert_parallel]": 341.95159026194597,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-mixed_parallel]": 344.84741433890304,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-tensor_parallel]": 362.50067716895137,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_pp_reduce_scatter_tp2pp2": 642.7109664399759,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_weight_only_int4_tp2": 883.2716767450911,
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_weight_only_int8_tp2": 596.1287247948931,
"accuracy/test_cli_flow.py::TestNemotronMini4BInstruct::test_fp8_prequantized": 208.21560259815305,
"accuracy/test_cli_flow.py::TestPhi2::test_auto_dtype": 284.1176424920559,
"accuracy/test_cli_flow.py::TestPhi2::test_tp2": 94.65857742994558,
"accuracy/test_cli_flow.py::TestPhi3Mini128kInstruct::test_auto_dtype": 500.89369447529316,
"accuracy/test_cli_flow.py::TestPhi3Mini4kInstruct::test_auto_dtype": 251.17038829252124,
"accuracy/test_cli_flow.py::TestPhi3Small128kInstruct::test_auto_dtype": 512.450893450994,
"accuracy/test_cli_flow.py::TestPhi3Small8kInstruct::test_auto_dtype": 306.3908146258909,
"accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype": 475.5876609608531,
"accuracy/test_cli_flow.py::TestQwen1_5MoeA2_7BChat::test_auto_dtype": 409.6967312040506,
"accuracy/test_cli_flow.py::TestQwen1_5MoeA2_7BChat::test_weight_only": 409.4440165119886,
"accuracy/test_cli_flow.py::TestQwen2_0_5BInstruct::test_auto_dtype": 249.48429385805503,
"accuracy/test_cli_flow.py::TestQwen2_0_5BInstruct::test_fp8": 139.30493941006716,
"accuracy/test_cli_flow.py::TestQwen2_0_5BInstruct::test_weight_only": 399.7237217463553,
"accuracy/test_cli_flow.py::TestQwen2_1_5B::test_auto_dtype_cp4": 243.65832925395807,
"accuracy/test_cli_flow.py::TestQwen2_57B_A14B::test_tp2pp2": 1000.2955902386456728,
"accuracy/test_cli_flow.py::TestQwen2_57B_A14B::test_tp4": 1000.178949700668454,
"accuracy/test_cli_flow.py::TestQwen2_7BInstruct::test_int4_awq_prequantized": 604.7383968606591,
"accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype": 621.1919705275446177,
"accuracy/test_cli_flow.py::TestStarcoder2_15B::test_smooth_quant_ootb": 621.3599092587829,
"accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype": 221.9660275951028,
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_float32": 171.85410665394738,
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_pp4": 93.37271417694865,
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only[int4]": 371.7965512983501,
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only[int8]": 159.531545445323,
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only_int8_kv_cache[int8]": 184.35870655626059,
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only_manage_weights[int4]": 161.57166086137295,
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=False-chunked_context=False-typical_acceptance=False]": 422.75362031999975,
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=False-typical_acceptance=False]": 910.3428834918886,
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=False-typical_acceptance=True]": 820.5789388604462,
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=True-typical_acceptance=False]": 1072.9654933288693,
"accuracy/test_cli_flow.py::TestVicuna7B::test_lookahead": 947.7913959696889,
"accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=False]": 854.6058550588787,
"accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=True]": 553.1062062960118,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False]": 229.88366167014465,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=True]": 1291.2246230191085,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=False]": 226.58150382409804,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=True]": 230.04569808510132,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=0]": 153.0148511910811,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=2]": 151.06352188810706,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=0]": 157.06875282898545,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2]": 153.20611042692326,
"accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend": 71.2399792142678,
"accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[False]": 286.7775873204227537,
"accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[True]": 286.6778334858827293,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False-False-False]": 781.7928658421151,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True-True-True]": 270.3750694899354,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=1-ctx_pp=2]": 195.4896494857967,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=1-ctx_pp=4]": 205.93911361903884,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=2-ctx_pp=2]": 188.56422709790058,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=2-ctx_pp=4]": 199.29050170327537,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2]": 127.93316596397199,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=4]": 129.7617962991353,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2]": 127.73340241820551,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=4]": 129.19083517300896,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]": 329.7597716320306,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True]": 232.4293970640283,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]": 198.69031671597622,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]": 199.68801344232634,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[llguidance-eagle3_one_model=False]": 161.8193401999306,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[llguidance-eagle3_one_model=True]": 107.20401050220244,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=False]": 163.33450849773362,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=True]": 137.72523731505498,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[GSM8K]": 209.07444706000388,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU]": 149.8242256443482,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram": 286.3013918437063694,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[GSM8K-tp1pp2]": 199.44922504294664,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[GSM8K-tp2pp1]": 173.89489603298716,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[GSM8K-tp2pp2]": 172.84839022206143,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp1pp2]": 128.10282056825235,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp1]": 121.90447079204023,
"accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2]": 117.0786016730126,
"accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[False]": 64428.639228201006,
"accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True]": 572.5455802679062,
"accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[False]": 472.62511800276116,
"accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[True]": 273.7770717362873,
"accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend": 56.07656032079831,
"accuracy/test_llm_api.py::TestLlama3_1_8B::test_fp8_rowwise": 361.5573864541948,
"accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_gather_generation_logits_cuda_graph": 95.2069768682122,
"accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]": 92.18154831900029,
"accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar]": 92.545280149206519,
"accuracy/test_llm_api.py::TestLlama3_2_1B::test_auto_dtype": 48.46169294399442,
"accuracy/test_llm_api.py::TestLlama3_2_1B::test_fp8_pp2": 91.98502069339156,
"accuracy/test_llm_api.py::TestLlama3_2_1B::test_fp8_rowwise": 46.47006619500462,
"accuracy/test_llm_api.py::TestLlama3_2_1B::test_int4_awq": 86.60050829302054,
"accuracy/test_llm_api.py::TestLlama3_2_1B::test_int4_awq_int8_kv_cache": 92.7290103025734425,
"accuracy/test_llm_api.py::TestLlama3_2_1B::test_smooth_quant": 84.79963076103013,
"accuracy/test_llm_api.py::TestLlama3_2_1B::test_smooth_quant_ootb": 99.57484673697036,
"accuracy/test_llm_api.py::TestMixtral8x7B::test_smooth_quant_tp2pp2": 292.669760379940271,
"accuracy/test_llm_api.py::TestMixtral8x7B::test_tp2": 226.48782553203637,
"accuracy/test_llm_api.py::TestMixtral8x7BInstruct::test_awq_tp2": 2021.8361112800194,
"accuracy/test_llm_api.py::TestQwen2_5_0_5BInstruct::test_fp8": 93.85909735393943,
"accuracy/test_llm_api.py::TestQwen2_5_1_5BInstruct::test_auto_dtype": 173.21916160301771,
"accuracy/test_llm_api.py::TestQwen2_5_1_5BInstruct::test_fp8": 176.52263558004051,
"accuracy/test_llm_api.py::TestQwen2_5_1_5BInstruct::test_weight_only": 137.54228180646896,
"accuracy/test_llm_api.py::TestQwen2_5_7BInstruct::test_fp8": 223.2249169460265,
"accuracy/test_llm_api.py::TestQwen2_5_7BInstruct::test_fp8_kvcache": 293.131719612516463,
"accuracy/test_llm_api.py::TestQwen2_7BInstruct::test_auto_dtype": 146.72803921002196,
"accuracy/test_llm_api.py::TestQwen2_7BInstruct::test_fp8": 149.11297382606426,
"accuracy/test_llm_api.py::TestQwen2_7BInstruct::test_weight_only": 594.9357111975551,
"accuracy/test_llm_api_pytorch.py::TestBielik11BInstruct::test_auto_dtype": 3600.001755183912,
"accuracy/test_llm_api_pytorch.py::TestBielik11BInstruct::test_fp8": 3600.0018868579646,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput]": 7291.4317992888391,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[latency]": 7291.9177003782242537,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[throughput]": 7291.818120779618621,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency]": 7292.3961801091209054,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_adp_lmtp]": 7292.2807201944456277,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen]": 7292.6643901705286214,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp]": 7292.5567501965034455,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput]": 7292.690330147743225,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp4]": 7292.468819195404649,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8]": 7292.699600299820304,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus_chunked_prefill[latency]": 7292.5356449983082712,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus_chunked_prefill[throughput_tp4]": 7292.413349622860551,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus_corner_case": 7292.248919965699315,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 591.2785023800097,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]": 107.58471493399702,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 143.84012729604729,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 295.3527018489549,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]": 306.84709841990843,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 220.57452515885234,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]": 165.08514453098178,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 202.22269394202158,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 113.82226522010751,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]": 205.7252635700861,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=True]": 213.78996226208983,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=0-moe_backend=WIDEEP]": 292.7267158059985377,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2-moe_backend=WIDEEP]": 292.5756296711042523,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[-attention_dp-cuda_graph-overlap_scheduler-torch_compile=False]": 326.1317654890008,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 647.6109309499152,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 184.20976317999884,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 226.01353620411828,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 506.1045090719126,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-attention_dp-cuda_graph-overlap_scheduler-torch_compile=False]": 336.02580665098503,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 413.903915906325,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 143.841789112892,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 246.64391099987552,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 202.37037238897756,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus_static_eplb": 3600.0020909640007,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=0]": 3600.441698686045129,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=2]": 3600.591623628977686,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=0]": 3600.642337311059237,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2]": 3600.572194146050606,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding_4gpus[llguidance-mtp_nextn=0]": 3600.003432472993,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding_4gpus[llguidance-mtp_nextn=2]": 3600.21731633093441,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding_4gpus[xgrammar-mtp_nextn=0]": 3600.002661294944,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_guided_decoding_4gpus[xgrammar-mtp_nextn=2]": 3600.0022540579666,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 89.92349556891713,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 85.24235329206567,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 252.70569713797886,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 90.21807348495349,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]": 175.661773331929,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 360.0003233290044590831,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=False-moe_backend=WIDEEP]": 360.9275938109494746,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=WIDEEP]": 360.0002855450729839504,
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_batch_waiting[batch_wait_timeout_iters=10-batch_wait_max_tokens_ratio=0.75-mtp_nextn=0-fp8kv=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 360.0003064870252273977,
"accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype": 3600.0004039629711769521,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass-auto]": 360.00032637204276397824,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass-fp8]": 360.0003586999955587089,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton-auto]": 360.6586053780047223,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm-auto]": 360.0003633099840953946,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm-fp8]": 360.00036422599805518985,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-cutlass-auto]": 360.0003378289984539151,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-triton-auto]": 360.9436147869564593,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm-auto]": 360.0003398499684408307,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm-fp8]": 360.0002922280109487474,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-cutlass-auto]": 360.0003666180418804288,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-triton-auto]": 360.9300670439261012,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm-auto]": 360.0002812399761751294,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm-fp8]": 360.0008064290159381926,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-cutlass-auto]": 360.0003697940264828503,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-triton-auto]": 360.8670774899655953,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm-auto]": 360.00040231598541140556,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm-fp8]": 360.0003254589391872287,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass-auto]": 745.8583740849863,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton-auto]": 745.9345730679342523,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm-auto]": 745.0004936959594488144,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm-fp8]": 745.00031642295653000474,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass-auto]": 658.1757711600512,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-triton-auto]": 745.9436021829606034,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm-auto]": 745.0004371170070953667,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm-fp8]": 745.0004142870311625302,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass-auto]": 676.3980704760179,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton-auto]": 745.0292645250447094,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm-auto]": 745.0003769229515455663,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm-fp8]": 677.000331886054482311,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_chunked_prefill[cutlass-auto]": 643.3513998010312,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_chunked_prefill[triton-auto]": 764.9216735750087537,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_chunked_prefill[trtllm-auto]": 764.0002969659981317818,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_chunked_prefill[trtllm-fp8]": 764.0008383550448343158,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4-auto]": 764.8800516680348665,
"accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4-fp8]": 764.00035094103077426553,
"accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype": 88.30407958402066,
"accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype": 892.3242024959764,
"accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized": 451.8630696780165,
"accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype": 86.96273394301534,
"accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]": 0.19557904999237508,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4": 56.31924073398113,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_auto_dtype_beam_search[enable_cuda_graph=False-enable_padding=False-disable_overlap_scheduler=False]": 115.23303954396397,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_auto_dtype_beam_search[enable_cuda_graph=False-enable_padding=False-disable_overlap_scheduler=True]": 115.0261728389305,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_auto_dtype_beam_search[enable_cuda_graph=True-enable_padding=False-disable_overlap_scheduler=False]": 98.29266697796993,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_auto_dtype_beam_search[enable_cuda_graph=True-enable_padding=False-disable_overlap_scheduler=True]": 96.56704166403506,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_auto_dtype_beam_search[enable_cuda_graph=True-enable_padding=True-disable_overlap_scheduler=False]": 98.47511212801328,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_auto_dtype_beam_search[enable_cuda_graph=True-enable_padding=True-disable_overlap_scheduler=True]": 96.76115251897136,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=FLASHINFER-torch_compile=False]": 307.12596721109,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=FLASHINFER-torch_compile=True]": 443.91388061689213,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=TRTLLM-torch_compile=False]": 191.10617867391557,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=TRTLLM-torch_compile=True]": 166.85348949534819,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=FLASHINFER]": 167.15153613401344,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=TRTLLM]": 90.12104846700095,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]": 1112.0988524899585,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True]": 979.2759481471148,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=FLASHINFER-torch_compile=False]": 237.24446990108117,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=FLASHINFER-torch_compile=True]": 226.39608797896653,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=TRTLLM-torch_compile=False]": 174.38962662010454,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=TRTLLM-torch_compile=True]": 313.69273760309443,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=FLASHINFER-torch_compile=False]": 409.8932851999998,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=FLASHINFER-torch_compile=True]": 344.8807112099603,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=TRTLLM-torch_compile=False]": 103.82129427790642,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=TRTLLM-torch_compile=True]": 164.91815144987777,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=False-enable_padding=False-disable_overlap_scheduler=False]": 124.62386814301135,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=False-enable_padding=False-disable_overlap_scheduler=True]": 121.9821372089209,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=True-enable_padding=False-disable_overlap_scheduler=False]": 77.16783500701422,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=True-enable_padding=False-disable_overlap_scheduler=True]": 75.89485901995795,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=True-enable_padding=True-disable_overlap_scheduler=False]": 77.46995012206025,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=True-enable_padding=True-disable_overlap_scheduler=True]": 75.93042165302904,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler": 61.986203632026445,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]": 44.978786278981715,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]": 46.60325947904494,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[llguidance]": 52.44408063602168,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar]": 52.84089746000245,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[llguidance-eagle3_one_model=False]": 36.739327706047334,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[llguidance-eagle3_one_model=True]": 67.41741452802671,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=False]": 36.758924948982894,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=True]": 32.73584783205297,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[llguidance]": 31.598825128981844,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[xgrammar]": 31.997449714050163,
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_ngram": 404.38593446696177,
"accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_auto_dtype": 49.4644276680192,
"accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantized": 40.83171262202086,
"accuracy/test_llm_api_pytorch.py::TestLlama3_2_3B::test_auto_dtype": 60.00025596999330446124,
"accuracy/test_llm_api_pytorch.py::TestLlama3_2_3B::test_fp8_prequantized": 67.63213626696961,
"accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False-torch_compile=False]": 3770.7388199700508,
"accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=True-torch_compile=False]": 4010.9139676889754,
"accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4[torch_compile=False]": 2616.262340236979,
"accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4[torch_compile=False]": 3600.00026297004660591483,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp4-cuda_graph=False]": 7200.00030252401484176517,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp4ep2-cuda_graph=True]": 7200.0002791329752653837,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp4ep4-cuda_graph=True]": 7200.00028608099091798067,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]": 7200.0002498350222595036,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]": 7200.00023661594605073333,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]": 7200.00023339298786595464,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_chunked_prefill[attn_backend=FLASHINFER]": 7200.0002561529981903732,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_chunked_prefill[attn_backend=TRTLLM]": 7200.0052206520340405405,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp4-cuda_graph=True]": 7200.2161163400160149,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp4ep2-cuda_graph=True]": 7200.2132116109714843,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp4ep4-cuda_graph=True]": 7200.3660773960873485,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8-cuda_graph=True]": 7200.00025232898769900203,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8ep4-cuda_graph=True]": 7200.00025386200286448,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8ep8-cuda_graph=True]": 7200.00023917207727208734,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_chunked_prefill[tp8ep8-cuda_graph=False]": 7200.5301868109382,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_chunked_prefill[tp8ep8-cuda_graph=True]": 72600.1488124400494,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=False]": 7255.686029373668134,
"accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True]": 7255.3157395950402,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp4-cuda_graph=False]": 3600.0010551271309959702,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp4ep2-cuda_graph=True]": 3600.0009890546519891359,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp4ep4-cuda_graph=True]": 3600.000870058874017559,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8-cuda_graph=False]": 3600.0022709049517,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]": 3600.0008703919593,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]": 3600.001674739062,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4[tp4-cuda_graph=True]": 3600.0003222679952159524,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4[tp8ep8-cuda_graph=True]": 3600.0004189839819446206,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4_chunked_prefill[tp4ep4-cuda_graph=True]": 3600.0009446179610677063,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8[tp4-cuda_graph=True]": 3600.0600651470012963,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8[tp8ep8-cuda_graph=True]": 3600.0020443379763,
"accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_chunked_prefill[tp4ep4-cuda_graph=True]": 3600.0016280449927,
"accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype": 300.0017418859643,
"accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8": 300.001715709921,
"accuracy/test_llm_api_pytorch.py::TestMinitron4BBaseInstruct::test_fp8_prequantized": 48.064747432945296,
"accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype": 96.73990149900783,
"accuracy/test_llm_api_pytorch.py::TestMistralNemo12B::test_auto_dtype": 3600.0012007239857,
"accuracy/test_llm_api_pytorch.py::TestMistralNemo12B::test_auto_dtype_tp2": 3600.001961252012,
"accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype": 3600.0003251209855079651,
"accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8": 196.57955891895108,
"accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_fp8_tp2": 3600.0022730380297,
"accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_nvfp4_tp2": 3600.7994798690197058,
"accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype_tp8": 3600.5389363930444,
"accuracy/test_llm_api_pytorch.py::TestPhi4::test_auto_dtype": 3600.0017098310054,
"accuracy/test_llm_api_pytorch.py::TestPhi4::test_fp8": 3600.0018840720295,
"accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype": 3600.000989172084,
"accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope": 3600.001809718029,
"accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype": 3600.0018334789784,
"accuracy/test_llm_api_pytorch.py::TestQwen2_7BInstruct::test_auto_dtype": 49.38216367200948,
"accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_fp8[latency]": 7576.4847942629713,
"accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_fp8[throughput_latency]": 75934.1885519769276,
"accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass]": 7570.00041150598553940654,
"accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm]": 7570.0005082660354673862,
"accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_attention_dp]": 7570.0003929820377379656,
"accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3]": 7570.000994006055407226,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]": 3240.3035402488895,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=True]": 3240.3568000392988324,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]": 3240.326488903677091,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]": 3240.3251879825256765,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_trtllm-torch_compile=False]": 3240.3220480284653604,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_trtllm-torch_compile=True]": 3240.12236930197104812,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRTLLM]": 3240.3637070371955633,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency-CUTLASS]": 3240.5122077039559372,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency-TRITON]": 3240.46138638898264617,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency-TRTLLM]": 3240.4374056719825603,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-CUTLASS]": 3240.4651110970880836,
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-TRTLLM]": 3240.4263977239606902,
"accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency-torch_compile=False]": 149.19146074401215,
"accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]": 104.32479889906244,
"accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[fp8-latency]": 240.30756398336961865,
"accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[mxfp8-latency]": 240.27633493300527334,
"cpp/test_e2e.py::test_benchmarks[bart-90]": 271.95234084688127,
"cpp/test_e2e.py::test_benchmarks[gpt-80]": 1376.0404928650241,
"cpp/test_e2e.py::test_benchmarks[t5-90]": 523.07,
"cpp/test_e2e.py::test_model[-bart-90]": 391.84748707409017,
"cpp/test_e2e.py::test_model[-eagle-86]": 850.5158995762467,
"cpp/test_e2e.py::test_model[-enc_dec_language_adapter-90]": 416.06,
"cpp/test_e2e.py::test_model[-gpt-80]": 1568.98,
"cpp/test_e2e.py::test_model[-gpt_executor-80]": 1495.14,
"cpp/test_e2e.py::test_model[-gpt_tests-80]": 1206.79,
"cpp/test_e2e.py::test_model[-mamba-86]": 893.8684413917363,
"cpp/test_e2e.py::test_model[-medusa-86]": 577.0913726426661,
"cpp/test_e2e.py::test_model[-redrafter-86]": 356.56682327389717,
"cpp/test_e2e.py::test_model[-t5-90]": 170.26,
"cpp/test_e2e.py::test_model[fp8-llama-90]": 385.98,
"cpp/test_unit_tests.py::test_unit_tests[batch_manager-80]": 1005.24,
"cpp/test_unit_tests.py::test_unit_tests[common-80]": 38.98,
"cpp/test_unit_tests.py::test_unit_tests[common-90]": 25.06,
"cpp/test_unit_tests.py::test_unit_tests[executor-80]": 425.16,
"cpp/test_unit_tests.py::test_unit_tests[kernels-80]": 2009.96,
"cpp/test_unit_tests.py::test_unit_tests[kernels-90]": 1333.18,
"cpp/test_unit_tests.py::test_unit_tests[layers-80]": 2209.11,
"cpp/test_unit_tests.py::test_unit_tests[layers-90]": 1627.07,
"cpp/test_unit_tests.py::test_unit_tests[runtime-80]": 1671.42,
"cpp/test_unit_tests.py::test_unit_tests[thop-80]": 6.76,
"cpp/test_unit_tests.py::test_unit_tests[thop-90]": 4.16,
"cpp/test_unit_tests.py::test_unit_tests[utils-80]": 8.53,
"cpp/test_unit_tests.py::test_unit_tests[utils-90]": 5.28,
"disaggregated/test_disaggregated.py::test_disaggregated_cache_aware_balance[TinyLlama-1.1B-Chat-v1.0]": 54.26733888499439,
"disaggregated/test_disaggregated.py::test_disaggregated_ctxpp4_genpp4[TinyLlama-1.1B-Chat-v1.0]": 64.01942083099857,
"disaggregated/test_disaggregated.py::test_disaggregated_ctxtp2pp2_gentp2pp2[TinyLlama-1.1B-Chat-v1.0]": 67.32925470802002,
"disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]": 50.57874152995646,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp[DeepSeek-V3-Lite-fp8]": 105.12023228011094,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8]": 98.13158084987663,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8]": 104.78005758393556,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_mpi[DeepSeek-V3-Lite-fp8]": 680.20395052596,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_nixl[DeepSeek-V3-Lite-fp8]": 102.03138376423158,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu[DeepSeek-V3-Lite-fp8]": 90.40784636512399,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp[DeepSeek-V3-Lite-fp8]": 124.17078560194932,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx[DeepSeek-V3-Lite-fp8]": 102.67233599093743,
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu[DeepSeek-V3-Lite-fp8]": 224.28071974776685,
"disaggregated/test_disaggregated.py::test_disaggregated_kv_cache_time_output[TinyLlama-1.1B-Chat-v1.0]": 52.78952780482359,
"disaggregated/test_disaggregated.py::test_disaggregated_load_balance[TinyLlama-1.1B-Chat-v1.0]": 73.48997121001594,
"disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]": 67.3897166326642,
"disaggregated/test_disaggregated.py::test_disaggregated_multi_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 54.22262764698826,
"disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]": 98.97588296607137,
"disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 67.9668476767838,
"disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun_trt_backend[TinyLlama-1.1B-Chat-v1.0]": 82.28277984517626,
"disaggregated/test_disaggregated.py::test_disaggregated_trtllm_sampler[TinyLlama-1.1B-Chat-v1.0]": 62.51559329708107,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8]": 238.76137515995651,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[False-False-DeepSeek-V3-Lite-fp8/fp8]": 78.98068026197143,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[False-True-DeepSeek-V3-Lite-fp8/fp8]": 77.51831256924197,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8]": 99.81417108187452,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-True-DeepSeek-V3-Lite-fp8/fp8]": 67.32832619687542,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-False-TinyLlama-1.1B-Chat-v1.0]": 48.16434509307146,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-True-TinyLlama-1.1B-Chat-v1.0]": 36.88020430901088,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-False-TinyLlama-1.1B-Chat-v1.0]": 46.302398771978915,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-True-TinyLlama-1.1B-Chat-v1.0]": 38.81214914191514,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_qwen3[False-False-Qwen3-8B-FP8]": 31.12580855889246,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_qwen3[False-True-Qwen3-8B-FP8]": 29.8858498937916,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_qwen3[True-False-Qwen3-8B-FP8]": 55.79476427496411,
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_qwen3[True-True-Qwen3-8B-FP8]": 55.29072010796517,
"disaggregated/test_workers.py::test_workers_conditional_disaggregation[TinyLlama-1.1B-Chat-v1.0]": 99.67464494984597,
"disaggregated/test_workers.py::test_workers_kv_cache_aware_router[TinyLlama-1.1B-Chat-v1.0]": 113.69421282503754,
"disaggregated/test_workers.py::test_workers_kv_cache_aware_router_eviction[TinyLlama-1.1B-Chat-v1.0]": 195.86076739802957,
"disaggregated/test_workers.py::test_workers_kv_cache_events[TinyLlama-1.1B-Chat-v1.0]": 76.3156570668798,
"examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-BertModel-bert/bert-base-uncased]": 111.17977902293205,
"examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-RobertaModel-bert/roberta-base]": 115.47540166974068,
"examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-use_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-BertForQuestionAnswering-bert/bert-base-cased-squad2]": 64.32757595699513,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-disable_context_fmha-tp:1-pp:1-float16-BertModel-bert/bert-base-uncased]": 55.807815481035504,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-BertModel-bert/bert-base-uncased]": 60.69481396203628,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertForQuestionAnswering-bert/bert-base-cased-squad2]": 73.41519981494639,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 73.00019321503350511193,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertModel-bert/bert-base-uncased]": 121.36917966976762,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]": 74.99331583303865,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]": 73.000189481012057513,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaModel-bert/roberta-base]": 122.99858937039971,
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha_fp32_acc-tp:1-pp:1-float16-BertModel-bert/bert-base-uncased]": 55.26069303997792,
"examples/test_bindings.py::test_llm_bindings_example[llama-7b]": 407.3440967289498,
"examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-chat-disable_weight_only]": 3640.0045928549953,
"examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-chat-enable_weight_only]": 3235.5094731519348,
"examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-disable_weight_only]": 3273.7859199331142,
"examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-enable_weight_only]": 3602.5807401749771,
"examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[disable_weight_only]": 4923.762180174061,
"examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[enable_weight_only]": 1621.8304493280011,
"examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only]": 1856.5704392530024,
"examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[enable_weight_only]": 654.6126579149859,
"examples/test_draft_target_model.py::test_llm_draft_target_llama_1gpu": 819.6695717818802,
"examples/test_draft_target_model.py::test_llm_draft_target_llama_fp8_2gpu": 3212.2392697520554,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs2]": 244.9744301661849,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_logits-draft_len_8-float16-bs1]": 398.78198734589387,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs2]": 241.73137632384896,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-llama_v2-use_cpp_session-use_tokens-draft_len_4-float16-bs2]": 1099.3959164519329,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs2]": 249.52418848499656,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs2]": 257.3995385244489,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_tokens-draft_len_8-float16-bs1]": 397.20915103994776,
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-llama_v2-use_cpp_session-use_logits-draft_len_4-float16-bs2]": 582.6556157508167,
"examples/test_eagle.py::test_codellama_eagle_1gpu[CodeLlama-7b-Instruct-eagle1]": 350.00021487101912498474,
"examples/test_eagle.py::test_codellama_eagle_1gpu[CodeLlama-7b-Instruct-eagle2]": 360.0001300480216741562,
"examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle1]": 350.00018324801931157708,
"examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle2]": 351.8955009509809315,
"examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.2-1b-eagle1]": 350.00017943303100764751,
"examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.2-1b-eagle2]": 350.00016221700934693217,
"examples/test_eagle.py::test_llama_eagle_1gpu[llama-v2-7b-hf-eagle1]": 340.0002391840098425746,
"examples/test_eagle.py::test_llama_eagle_1gpu[llama-v2-7b-hf-eagle2]": 360.000152449996676296,
"examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle1]": 254.24225717037916,
"examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle2]": 246.98607586231083,
"examples/test_eagle.py::test_llm_eagle_1gpu_modelopt_ckpt[llama3.1-eagle-8b-hf_v0.5-float16-bs8]": 489.4540088879876,
"examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-mini-128k-instruct-eagle1]": 480.0001504488755017519,
"examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-mini-128k-instruct-eagle2]": 480.00015106401406228542,
"examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-small-128k-instruct-eagle1]": 360.00015012198127806187,
"examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3-small-128k-instruct-eagle2]": 360.00016741501167416573,
"examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3.5-mini-instruct-eagle1]": 360.0001481780782341957,
"examples/test_eagle.py::test_phi_eagle_1gpu[Phi-3.5-mini-instruct-eagle2]": 350.374636543914676,
"examples/test_eagle.py::test_phi_eagle_1gpu[phi-2-eagle1]": 340.00016570300795137882,
"examples/test_eagle.py::test_phi_eagle_1gpu[phi-2-eagle2]": 360.0003829820198006928,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen1.5_7b_chat-eagle1]": 350.0001727980561554432,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen1.5_7b_chat-eagle2]": 300.00014008115977048874,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen2.5_1.5b_instruct-eagle1]": 360.00027757894713431597,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen2.5_1.5b_instruct-eagle2]": 360.5976813130546361,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen2_0.5b_instruct-eagle1]": 350.000569747993722558,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen2_0.5b_instruct-eagle2]": 300.0001450190320611,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen2_7b_instruct-eagle1]": 350.000147037033457309,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen2_7b_instruct-eagle2]": 300.00014351692516356707,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen_7b_chat-eagle1]": 350.0002464139834046364,
"examples/test_eagle.py::test_qwen_eagle_1gpu[qwen_7b_chat-eagle2]": 300.00014754594303667545,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-bfloat16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 232.2934926636517,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 281.1201816312969,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 2954.5586752621457,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:2-disable_fp8]": 276.10329104214907,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 233.94542215764523,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 283.8865255280398,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-disable_gemm_plugin-disable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 220.32321695238352,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-disable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 218.02495155483484,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 96.25796012202045,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 568.2032693652436,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:2-nb:1-enable_fp8]": 354.8090338760521,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-mbart-large-50-many-to-one-mmt-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 160.50506488600513,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-mbart-large-50-many-to-one-mmt-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:2-nb:1-enable_fp8]": 300.20793505245819688,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-disable_gemm_plugin-disable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 218.7171499580145,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 217.96836187317967,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 498.1236152825877,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:2-disable_fp8]": 205.83720442652702,
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:1-nb:1-enable_fp8]": 295.6078515049885,
"examples/test_enc_dec.py::test_llm_enc_dec_general[no_compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 252.0442810030654,
"examples/test_enc_dec.py::test_llm_enc_dec_general[no_compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:1-nb:1-disable_fp8]": 95.48429084802046,
"examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-disable_fp8]": 422.4394793640822,
"examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-enable_fp8]": 1074.875556848012,
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:1]": 3002.19654186069965,
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:4]": 3074.5337073504925,
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_deep_2.4b-float16-nb:4]": 243.4259528592229,
"examples/test_exaone.py::test_llm_exaone_1gpu[enable_weight_only-exaone_deep_2.4b-float16-nb:1]": 212.50885355699575,
"examples/test_exaone.py::test_llm_exaone_2gpu[exaone_3.0_7.8b_instruct-float16-nb:1]": 7155.35844087804435,
"examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-27b-it]": 317.7816583644599,
"examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-9b-it]": 317.7816583644599,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2-27b-it-other-bfloat16-8]": 7020.1766637390829,
"examples/test_gemma.py::test_llm_gemma_1gpu_summary_vswa[gemma-3-1b-it-other-bfloat16-8]": 195.3050664511975,
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8]": 317.7816583644599,
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[non_streaming-use_cpp_session-enable_gemm_plugin]": 114.20040711760521,
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[non_streaming-use_py_session-disable_gemm_plugin]": 105.38906556204893,
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[streaming-use_cpp_session-enable_gemm_plugin]": 113.51056583970785,
"examples/test_gpt.py::test_llm_gpt2_medium_1node_4gpus[tp1pp4]": 163.68014803691767,
"examples/test_gpt.py::test_llm_gpt2_medium_1node_4gpus[tp2pp2]": 78.42757173604332,
"examples/test_gpt.py::test_llm_gpt2_medium_1node_4gpus[tp4pp1]": 81.0192210940877,
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[non_streaming-use_cpp_session]": 194.89961875230074,
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[non_streaming-use_py_session]": 200.52475621178746,
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[streaming-use_cpp_session]": 195.00627667084336,
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[non_streaming-use_cpp_session]": 194.90547297894955,
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[non_streaming-use_py_session]": 194.89357279613614,
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[streaming-use_cpp_session]": 194.72326660901308,
"examples/test_gpt.py::test_llm_gpt2_multi_lora_1gpu[900_stories]": 289.75796797091607,
"examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_cpp_session-tp1]": 460.1370678450912,
"examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp1]": 403.39630596572533,
"examples/test_gpt.py::test_llm_gpt2_parallel_embedding_2gpu[float16-0]": 75.63374845997896,
"examples/test_gpt.py::test_llm_gpt2_parallel_embedding_2gpu[float16-1]": 94.17931449599564,
"examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b]": 259.4826051471755,
"examples/test_gpt.py::test_streaming_beam[batch_size_1-disable_return_all_generated_tokens-num_beams_1]": 171.47726750199217,
"examples/test_gpt.py::test_streaming_beam[batch_size_1-disable_return_all_generated_tokens-num_beams_4]": 172.01177623402327,
"examples/test_gpt.py::test_streaming_beam[batch_size_1-return_all_generated_tokens-num_beams_1]": 171.598967110971,
"examples/test_gpt.py::test_streaming_beam[batch_size_1-return_all_generated_tokens-num_beams_4]": 172.0947668950539,
"examples/test_gpt.py::test_streaming_beam[batch_size_3-disable_return_all_generated_tokens-num_beams_1]": 171.43781499005854,
"examples/test_gpt.py::test_streaming_beam[batch_size_3-disable_return_all_generated_tokens-num_beams_4]": 172.76054893503897,
"examples/test_gpt.py::test_streaming_beam[batch_size_3-return_all_generated_tokens-num_beams_1]": 172.11027200194076,
"examples/test_gpt.py::test_streaming_beam[batch_size_3-return_all_generated_tokens-num_beams_4]": 172.64392233698163,
"examples/test_granite.py::test_granite_bf16_lora[granite-3.0-1b-a400m-instruct]": 140.08338637300767,
"examples/test_granite.py::test_granite_bf16_lora[granite-3.0-2b-instruct]": 146.46410073013976,
"examples/test_granite.py::test_llm_granite[granite-3.0-1b-a400m-instruct-bfloat16]": 141.05149138718843,
"examples/test_granite.py::test_llm_granite[granite-3.0-2b-instruct-bfloat16]": 1055.801738537848,
"examples/test_internlm.py::test_llm_internlm2_7b_1node_1gpu[bfloat16-enable_context_fmha-enable_gemm_plugin-enable_attention_plugin-nb:2]": 2690.7412294782698,
"examples/test_llama.py::test_codellama_fp8_with_bf16_lora[CodeLlama-7b-Instruct]": 108.1456982519594,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.1-8b]": 160.33107751235366,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.2-1b]": 117.10959041584283,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.2-3b]": 215.69512976403348,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-v2-7b-hf]": 376.96383721905295,
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-v3-8b-instruct-hf]": 317.46144750900567,
"examples/test_llama.py::test_llm_api_lookahead_decoding_1gpu[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct]": 571.7052132850513,
"examples/test_llama.py::test_llm_llama_1gpu[llama-3.1-8b-instruct-hf-fp8-enable_fp8-float16-summarization-nb:1]": 853.2910006027669,
"examples/test_llama.py::test_llm_llama_1gpu_batched_beam_search[llama-7b]": 182.20104870200157,
"examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-3.1-70b-instruct-enable_norm_quant_fusion-enable_fused_quant-fp4_plugin-bfloat16]": 3170.7816583644599,
"examples/test_llama.py::test_llm_llama_1gpu_fp8_kv_cache[llama-v2-7b-hf-bfloat16]": 313.6555140609853,
"examples/test_llama.py::test_llm_llama_1gpu_streaming_llm[ailab-deepseek-coder-6.7b-instruct]": 317.7816583644599,
"examples/test_llama.py::test_llm_llama_2gpu_fp4[llama-3.1-70b-instruct-fp4_plugin]": 3170.7816583644599,
"examples/test_llama.py::test_llm_llama_2gpu_fp8_summary[llama-7b-enable_reduce_fusion-disable_fp8_context_fmha_xqa]": 495.9900846389355,
"examples/test_llama.py::test_llm_llama_2gpu_fp8_summary[llama-v2-13b-hf-disable_reduce_fusion-disable_fp8_context_fmha_xqa]": 768.7279437370016,
"examples/test_llama.py::test_llm_llama_2gpu_fp8_summary[llama-v2-13b-hf-enable_reduce_fusion-enable_fp8_context_fmha_xqa]": 111.7231666649459,
"examples/test_llama.py::test_llm_llama_code_llama_1gpu_summary[CodeLlama-7b-Instruct-enable_context_fmha-enable_gemm_plugin-enable_attention_plugin-nb:4]": 433.10567805200117,
"examples/test_llama.py::test_llm_llama_code_llama_1gpu_summary[CodeLlama-7b-Instruct-enable_with_fp32_acc-enable_gemm_plugin-enable_attention_plugin-nb:1]": 125.61783445900073,
"examples/test_llama.py::test_llm_llama_code_llama_multi_gpus_summary[CodeLlama-34b-Instruct-tp4pp1-nb:4]": 2684.5994712190004,
"examples/test_llama.py::test_llm_llama_code_llama_multi_gpus_summary[CodeLlama-70b-hf-tp2pp2-nb:1]": 7263.337530243967,
"examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-34b-Instruct-tp2pp2-int4_awq-nb:4]": 1884.0861918169539,
"examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-34b-Instruct-tp4pp1-fp8-nb:1]": 1302.0223898240365,
"examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-70b-hf-tp2pp2-int4_awq-nb:1]": 2354.3266233340255,
"examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-70b-hf-tp4pp1-fp8-nb:4]": 2580.1218281050096,
"examples/test_llama.py::test_llm_llama_long_alpaca_8gpu_summary[pg64317-tp8pp1-nb:1]": 441.2820012559532,
"examples/test_llama.py::test_llm_llama_lookahead_single_gpu_summary[llama-3.1-8b]": 109.91487449005945,
"examples/test_llama.py::test_llm_llama_lookahead_xqa_fp8_1gpu[llama-3.1-8b]": 121.70169674104545,
"examples/test_llama.py::test_llm_llama_lookahead_xqa_fp8_1gpu[llama-3.2-1b]": 79.68221819098108,
"examples/test_llama.py::test_llm_llama_v1_1gpu_kv_cache_reuse_with_prompt_table[llama-7b]": 167.92376559507102,
"examples/test_llama.py::test_llm_llama_v1_2gpu_summary[llama-7b-nb:4]": 317.7816583644599,
"examples/test_llama.py::test_llm_llama_v1_4gpu_paged_kv_cache[llama-3.1-8b]": 122.89023815206019,
"examples/test_llama.py::test_llm_llama_v1_multiple_lora_1gpu[luotuo_japan-llama-7b-lora_fp16-base_fp16]": 119.51703953905962,
"examples/test_llama.py::test_llm_llama_v1_multiple_lora_1gpu[luotuo_japan-llama-7b-lora_fp16-base_fp8]": 176.65850483701797,
"examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_awq]": 420.1779588930076,
"examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_fp16]": 895.7611340929288,
"examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_fp8]": 314.3205590210273,
"examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_int8_wo]": 329.1954380639363,
"examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_sq_ootb]": 216.2645359209855,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[disable_gemm_allreduce_plugin-llama-3.1-70b-enable_fp8]": 1654.751242957951,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-enable_fp8]": 20655.04908744397,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-fp8-disable_fp8]": 13962.460933016031,
"examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-70b-disable_fp8]": 4074.200704019924,
"examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.2-1b-disable_fp8]": 382.12588274572045,
"examples/test_llama.py::test_llm_llama_v3_1m_long_context_8gpus[Llama-3-8B-Instruct-Gradient-1048k]": 3732.5362696319353,
"examples/test_llama.py::test_llm_llama_v3_dora_1gpu[commonsense-llama-v3-8b-dora-r32-llama-v3-8b-hf-base_fp16]": 517.2770831151865,
"examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 64.8964971601963,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-1.4b-float16-enable_gemm_plugin]": 181.03255189501215,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-130m-float16-disable_gemm_plugin]": 136.8141469657421,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-130m-float16-enable_gemm_plugin]": 112.04011878371239,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-2.8b-float16-disable_gemm_plugin]": 336.9864642599714,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-370m-float16-enable_gemm_plugin]": 87.74965502799023,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-790m-float16-disable_gemm_plugin]": 148.56857219303492,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-disable_gemm_plugin]": 405.1586506664753,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin]": 384.9690850973129,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-1.3b-float16-enable_gemm_plugin]": 126.15442710905336,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-disable_gemm_plugin]": 129.8332964628935,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]": 118.37521690130234,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-2.7b-float16-disable_gemm_plugin]": 212.40601160994265,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-370m-float16-enable_gemm_plugin]": 75.9140890170238,
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-780m-float16-disable_gemm_plugin]": 114.81139776791679,
"examples/test_medusa.py::test_codellama_medusa_1gpu[CodeLlama-7b-Instruct]": 1200.00028181099332869053,
"examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1]": 217.8724013082683,
"examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8]": 460.24718615040183,
"examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1]": 129.02341048512608,
"examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8]": 524.8282293006778,
"examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]": 383.3182801879011,
"examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]": 293.03625723719597,
"examples/test_medusa.py::test_phi_medusa_1gpu[Phi-3-mini-128k-instruct]": 600.16531022801063955,
"examples/test_medusa.py::test_phi_medusa_1gpu[Phi-3-small-128k-instruct]": 600.056159240077249706,
"examples/test_medusa.py::test_phi_medusa_1gpu[Phi-3.5-mini-instruct]": 600.1656914030900225,
"examples/test_medusa.py::test_phi_medusa_1gpu[Phi-4-mini-instruct]": 600.05614239699207246,
"examples/test_medusa.py::test_phi_medusa_1gpu[phi-2]": 360.34046900307294,
"examples/test_medusa.py::test_qwen_medusa_1gpu[qwen1.5_7b_chat]": 360.056059536058455706,
"examples/test_medusa.py::test_qwen_medusa_1gpu[qwen2.5_1.5b_instruct]": 300.05598179390653968,
"examples/test_medusa.py::test_qwen_medusa_1gpu[qwen2_0.5b_instruct]": 360.0559237829875201,
"examples/test_medusa.py::test_qwen_medusa_1gpu[qwen2_7b_instruct]": 360.05622997402679175,
"examples/test_medusa.py::test_qwen_medusa_1gpu[qwen_7b_chat]": 360.4968392179580405,
"examples/test_mistral.py::test_llm_mistral_nemo_minitron_fp8_quantization[Mistral-NeMo-Minitron-8B-Instruct]": 464.4887059601024,
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-chunked_summarization_long]": 429.02448211982846,
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization]": 603.6547773182392,
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization_long]": 391.7267559207976,
"examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[mixtral-8x7b-v0.1-AWQ]": 911.8308052410139,
"examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora]": 3600.000286887981928885,
"examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora]": 3600.00015190104022622108,
"examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-cnn_dailymail-Qwen2-VL-7B-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False]": 332.0248579243198,
"examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-scienceqa-Llama-3.2-11B-Vision-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]": 411.7690062429756,
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1]": 385.0684349639341,
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-cpp_e2e:False-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 282.8564471802674,
"examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1]": 90.19939221709501,
"examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 275.5947739640251,
"examples/test_multimodal.py::test_llm_multimodal_general[Phi-4-multimodal-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 644.3520091949031,
"examples/test_multimodal.py::test_llm_multimodal_general[Qwen2-VL-7B-Instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:4]": 656.4784073680639,
"examples/test_multimodal.py::test_llm_multimodal_general[deplot-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 249.87254932150245,
"examples/test_multimodal.py::test_llm_multimodal_general[deplot-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1]": 100.08572303387336,
"examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1]": 492.22362083010375,
"examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1]": 333.81485258904286,
"examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-1.5-7b-hf-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 459.2980541479774,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-1.5-7b-hf-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1]": 176.8547668098472,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-1.5-7b-hf-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1]": 317.7816583644599,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-onevision-qwen2-7b-ov-hf-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 533.2010767317843,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-onevision-qwen2-7b-ov-hf-video-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 200.52463799191173,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 274.8723033480346,
"examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-vision-trtllm-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 306.38610201328993,
"examples/test_multimodal.py::test_llm_multimodal_general[nougat-base-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]": 226.57772368215956,
"examples/test_multimodal.py::test_llm_multimodal_general[nougat-base-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1]": 198.46779074892402,
"examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]": 260.0002240890171378851,
"examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1]": 260.00014175008982419968,
"examples/test_nemotron_nas.py::test_nemotron_nas_summary_1gpu[DeciLM-7B]": 335.41048416192643,
"examples/test_ngram.py::test_llm_ngram_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-max_draft_len_8-float16-bs1]": 200.00026965001598000526,
"examples/test_ngram.py::test_llm_ngram_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-max_draft_len_8-float16-bs2]": 196.1214354224503,
"examples/test_ngram.py::test_llm_ngram_1gpu[streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-max_draft_len_8-float16-bs1]": 200.0001331439707428217,
"examples/test_ngram.py::test_llm_ngram_1gpu[streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-max_draft_len_8-float16-bs2]": 195.90045699477196,
"examples/test_phi.py::test_llm_phi_1node_2gpus_summary[Phi-3.5-MoE-instruct-nb:1]": 500.5315603710478,
"examples/test_phi.py::test_llm_phi_lora_1gpu[Phi-3-mini-4k-instruct-ru-lora-Phi-3-mini-4k-instruct-lora_fp16-base_fp16]": 217.61977925198153,
"examples/test_phi.py::test_llm_phi_lora_1gpu[Phi-3-mini-4k-instruct-ru-lora-Phi-3-mini-4k-instruct-lora_fp16-base_fp8]": 220.0002483620774000883,
"examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3-mini-128k-instruct-fp8-float16]": 360.0002299160696566105,
"examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3.5-MoE-instruct-fp8-bfloat16]": 360.0001645770389586687,
"examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3.5-mini-instruct-fp8-float16]": 360.0001653869403526187,
"examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16]": 360.00032463017851114273,
"examples/test_phi.py::test_llm_phi_quantization_1gpu[phi-2-fp8-bfloat16]": 154.69453522900585,
"examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3-mini-128k-instruct]": 360.0003009570064023137,
"examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3-small-128k-instruct]": 360.00017751706764101982,
"examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3.5-MoE-instruct]": 1659.5979937280063,
"examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3.5-mini-instruct]": 360.00015944312326610088,
"examples/test_phi.py::test_phi_fp8_with_bf16_lora[phi-2]": 255.12105553690344,
"examples/test_qwen.py::test_llm_hf_qwen_multi_lora_1gpu[qwen2.5_0.5b_instruct]": 99.85199454193935,
"examples/test_qwen.py::test_llm_hf_qwen_multi_lora_1gpu[qwen2.5_1.5b_instruct]": 193.38715927954763,
"examples/test_qwen.py::test_llm_hf_qwen_multi_lora_1gpu[qwen2_0.5b_instruct]": 128.65416727401316,
"examples/test_qwen.py::test_llm_hf_qwen_quantization_1gpu[qwen2_vl_7b_instruct-fp8-bfloat16]": 360.00023509911261498928,
"examples/test_qwen.py::test_llm_qwen1_5_7b_single_gpu_lora[qwen1.5_7b_chat-Qwen1.5-7B-Chat-750Mb-lora]": 338.59182655182667,
"examples/test_qwen.py::test_llm_qwen1_5_moe_plugin_single_gpu_lora[qwen1.5_moe_a2.7b_chat-Upcycled-Qwen1.5-MoE2.7B-LoRA]": 360.00020574592053890228,
"examples/test_qwen.py::test_llm_qwen1_5_moe_single_gpu_lora[qwen1.5_moe_a2.7b_chat-Upcycled-Qwen1.5-MoE2.7B-LoRA]": 360.00013853202108293772,
"examples/test_qwen.py::test_llm_qwen_1node_8gpus_summary[qwen1.5_72b_chat-tp4pp2-context_fmha]": 3395.8409487981116,
"examples/test_qwen.py::test_llm_qwen_1node_8gpus_summary[qwen2.5_72b_chat-tp4pp2-context_fmha]": 483.20481619704515,
"examples/test_qwen.py::test_llm_qwen_1node_8gpus_summary[qwen2.5_72b_chat-tp8pp1-context_fmha_fp32_acc]": 143.27283577690832,
"examples/test_qwen.py::test_llm_qwen_1node_8gpus_summary[qwen2_72b_instruct-tp8pp1-context_fmha_fp32_acc]": 3382.2705945359776,
"examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen1.5_7b_chat-enable_gemm_plugin-enable_weight_only]": 238.04839180607814,
"examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen2.5_7b_chat-enable_gemm_plugin-enable_weight_only]": 232.16420529806055,
"examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen2_7b_instruct-enable_gemm_plugin-enable_weight_only]": 533.6866797241382,
"examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen2_vl_7b_instruct-enable_gemm_plugin-enable_weight_only]": 550.00023583497386425734,
"examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4]": 123.84369308606256,
"examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4]": 126.46052589698229,
"examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4]": 125.21641759388149,
"examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_vl_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4]": 485.33532909094356,
"examples/test_qwen.py::test_llm_qwen_awq_single_gpu_summary[qwen1.5_7b_chat-nb:4]": 492.117992953863,
"examples/test_qwen.py::test_llm_qwen_awq_single_gpu_summary[qwen2.5_7b_instruct-nb:4]": 232.93913857196458,
"examples/test_qwen.py::test_llm_qwen_awq_single_gpu_summary[qwen2_7b_instruct-nb:4]": 236.095401247032,
"examples/test_qwen.py::test_llm_qwen_awq_single_gpu_summary[qwen2_vl_7b_instruct-nb:4]": 0.00021052989177405834,
"examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen1.5_14b_chat_int4-nb:4]": 500.20522884093225,
"examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen1.5_7b_chat_awq-nb:1]": 230.01439016801305,
"examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen2.5_14b_instruct_int4-nb:4]": 510.19203821208794,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen1.5_0.5b_chat-enable_paged_kv_cache-enable_remove_input_padding-disable_weight_only-disable_fmha]": 82.50602095096838,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen1.5_0.5b_chat-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-disable_fmha]": 57.254435981973074,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen1.5_7b_chat-enable_paged_kv_cache-enable_remove_input_padding-disable_weight_only-disable_fmha]": 111.31314336089417,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen1.5_7b_chat-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc]": 170.95472188084386,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2.5_0.5b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc]": 88.65648145298474,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2.5_7b_instruct-enable_paged_kv_cache-enable_remove_input_padding-disable_weight_only-enable_fmha_fp32_acc]": 413.8294737141114,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2.5_7b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc]": 173.9835420260206,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2_vl_7b_instruct-enable_paged_kv_cache-enable_remove_input_padding-disable_weight_only-disable_fmha]": 105.8340686399024,
"examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2_vl_7b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc]": 171.40417280013207,
"examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen1.5_7b_chat-enable_ptpc-nb:4]": 254.3720522020012,
"examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen2.5_7b_instruct-enable_ptpc-nb:4]": 253.72904381889384,
"examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen2_7b_instruct-enable_ptpc-nb:4]": 256.547968689003,
"examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen2_vl_7b_instruct-enable_ptpc-nb:4]": 285.463871661108,
"examples/test_qwen2audio.py::test_llm_qwen2audio_single_gpu[qwen2_audio_7b_instruct]": 423.2417808100581,
"examples/test_qwenvl.py::test_llm_qwenvl_single_gpu_summary[qwen-vl-chat]": 717.1786148559768,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]": 680.0001445889938622713,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-fp8-float16-enable_attn_plugin-enable_gemm_plugin]": 680.00012858898844569921,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int4_awq-float16-enable_attn_plugin-enable_gemm_plugin]": 648.7579195387661,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int8_sq-float16-enable_attn_plugin-enable_gemm_plugin]": 680.00033479504054412246,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-flax-no_paged_cache-disable_quant-float16-enable_attn_plugin-disable_gemm_plugin]": 286.15992603078485,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-disable_attn_plugin-enable_gemm_plugin]": 680.0001984360278584063,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-no_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]": 680.00013055797899141908,
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-use_paged_cache-disable_quant-float16-enable_attn_plugin-enable_gemm_plugin]": 680.0001333290128968656,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8]": 386.68252966180444,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8]": 204.7229775050655,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8]": 411.88197461143136,
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8]": 429.239758990705,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-disable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime]": 327.95307156071067,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_cpp_runtime]": 249.98457504063845,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime]": 225.60136043280363,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-int4-float16-nb:1-use_cpp_runtime]": 114.39965145103633,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-int8-float16-nb:1-use_cpp_runtime]": 109.14321990706958,
"examples/test_whisper.py::test_llm_whisper_general[large-v3-enable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime]": 110.00021485507022589445,
"llmapi/test_llm_api_qa.py::TestLlmDefaultBackend::test_llm_args_logging": 458.4059731345624,
"llmapi/test_llm_api_qa.py::TestLlmDefaultBackend::test_llm_args_type_default": 40.70597969903611,
"llmapi/test_llm_api_qa.py::TestLlmDefaultBackend::test_llm_args_type_tensorrt": 99.26517963293009,
"llmapi/test_llm_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf]": 383.1450063039083,
"llmapi/test_llm_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf]": 101.96415167767555,
"llmapi/test_llm_examples.py::test_llmapi_server_example": 107.44753806525841,
"perf/test_perf.py::test_perf[bert_base-cpp-ootb-float16-bs:32-input_len:32]": 111.37450777366757,
"perf/test_perf.py::test_perf[bert_base-cpp-plugin-float16-bs:32-input_len:32]": 95.00738414749503,
"perf/test_perf.py::test_perf[gpt_350m-cppmanager-plugin_ifb-float16-bs:32-input_output_len:60": 99.74059158749878,
"perf/test_perf.py::test_perf[gpt_350m-cppmanager-plugin_ifb-float16-gwp:0.0-bs:32-input_output_len:60": 98.94526879303157,
"perf/test_perf.py::test_perf[gpt_350m-cppmanager-static_batching-plugin_ifb-float16-bs:32-input_output_len:60": 100.77929892018437,
"perf/test_perf.py::test_perf[roberta_base-cpp-plugin-float16-bs:32-input_len:128+512]": 140.2516261599958,
"test_cache.py::test_cache_sanity": 0.0006845169700682163,
"test_e2e.py::test_build_time_benchmark_sanity": 165.71592589840293,
"test_e2e.py::test_gpt3_175b_1layers_build_only": 131.34366285055876,
"test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]": 90.12235352798598,
"test_e2e.py::test_llama_e2e[use_py_session--]": 92.68217968731187,
"test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-]": 93.41917416104116,
"test_e2e.py::test_llmapi_chat_example": 105.19824166595936,
"test_e2e.py::test_llmapi_example_guided_decoding": 73.23708964884281,
"test_e2e.py::test_llmapi_example_inference": 66.82718145102262,
"test_e2e.py::test_llmapi_example_inference_async": 65.93082024902105,
"test_e2e.py::test_llmapi_example_inference_async_streaming": 67.49109892174602,
"test_e2e.py::test_llmapi_example_multilora": 72.87169548124075,
"test_e2e.py::test_llmapi_exit": 32.64902823418379,
"test_e2e.py::test_llmapi_load_ckpt_from_convert_command": 180.59318951144814,
"test_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf]": 243.36900701373816,
"test_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf]": 200.82293555140495,
"test_e2e.py::test_llmapi_load_engine_from_build_command_with_lora[llama-llama-models-v2/llama-v2-7b-hf]": 225.2778383679688,
"test_e2e.py::test_llmapi_quickstart": 66.53727849572897,
"test_e2e.py::test_llmapi_quickstart_atexit": 110.45052940770984,
"test_e2e.py::test_llmapi_server_example": 112.925546400249,
"test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding--]": 192.74050169810653,
"test_e2e.py::test_mistral_e2e[use_py_session---]": 160.08483010903,
"test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding--]": 157.39577213302255,
"test_e2e.py::test_mistral_large_hidden_vocab_size": 81.36711680702865,
"test_e2e.py::test_openai_chat_example": 876.1966922096908,
"test_e2e.py::test_openai_chat_guided_decoding": 55.12449237401597,
"test_e2e.py::test_openai_chat_harmony": 1162.7252594940364,
"test_e2e.py::test_openai_chat_multimodal_example": 215.8254322744906,
"test_e2e.py::test_openai_consistent_chat": 0.0001894170418381691,
"test_e2e.py::test_openai_misc_example": 256.0453990884125,
"test_e2e.py::test_openai_multi_chat_example": 684.1194127409253,
"test_e2e.py::test_ptp_quickstart": 45.67732956283726,
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]": 37.10870039300062,
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]": 81.43792725296225,
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]": 56.05445321695879,
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.2-11B-BF16-llama-3.2-models/Llama-3.2-11B-Vision]": 458.2611172522884,
"test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]": 1198.8076391071081,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-405B-FP8-llama-3.1-model/Llama-3.1-405B-Instruct-FP8]": 7200.0002610881347209215,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B]": 2692.1981226399075,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8]": 2467.9094018582255,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus[Mixtral-8x7B-BF16-Mixtral-8x7B-v0.1]": 1825.0858737968374,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]": 1800.0002716332674026489,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-False]": 7750.113898515003,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-True]": 7404.8092977448832,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Scout-17B-16E-Instruct-FP4-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4-True]": 3600.00027390988543629646,
"test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Scout-17B-16E-Instruct-FP8-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8-True]": 2183.7999707763083,
"test_e2e.py::test_ptp_quickstart_advanced_deepseek_r1_8gpus[DeepSeek-R1-DeepSeek-R1/DeepSeek-R1]": 8346.51794013707,
"test_e2e.py::test_ptp_quickstart_advanced_deepseek_r1_w4afp8_8gpus[DeepSeek-R1-W4AFP8-DeepSeek-R1/DeepSeek-R1-W4AFP8]": 11215.495469792979,
"test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B]": 109.26379436196294,
"test_e2e.py::test_ptp_quickstart_advanced_mixed_precision": 80.88908524392173,
"test_e2e.py::test_ptp_quickstart_advanced_mtp[DeepSeek-V3-Lite-BF16-DeepSeek-V3-Lite/bf16]": 99.42739840806462,
"test_e2e.py::test_ptp_quickstart_advanced_ngram[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct]": 71.96910276100971,
"test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 21.019993914989755,
"test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 18.753523574909195,
"test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image-False]": 280.00024179508909583092,
"test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False]": 278.4781197870616,
"test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-False]": 1206.060642259661,
"test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]": 101.94210673985071,
"test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False]": 142.41076623182744,
"test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-False]": 59.619638262083754,
"test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True]": 984.4696121218149,
"test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]": 78.27158910990693,
"test_e2e.py::test_ptp_quickstart_multimodal_2gpu[Phi-4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct]": 128.000317517900839448,
"test_e2e.py::test_ptp_quickstart_multimodal_2gpu[gemma-3-27b-it-gemma/gemma-3-27b-it]": 66.45376745378599,
"test_e2e.py::test_ptp_quickstart_multimodal_2gpu[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503]": 55.03272026847117,
"test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]": 46.110399533994496,
"test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]": 67.88439144194126,
"test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]": 360.00014796783216297626,
"test_e2e.py::test_ptp_quickstart_multimodal_chunked_prefill[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]": 360.00016685109585523605,
"test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-0.8-image]": 354.18758003995754,
"test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-0.8-image]": 313.79139575595036,
"test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-image]": 47.84053734713234,
"test_e2e.py::test_ptp_quickstart_multimodal_kv_cache_reuse[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-0.8-video]": 58.646747548133135,
"test_e2e.py::test_ptp_quickstart_multimodal_multiturn[Phi-4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct]": 0.00018897396512329578,
"test_e2e.py::test_ptp_quickstart_multimodal_multiturn[gemma-3-27b-it-gemma/gemma-3-27b-it]": 81.0151858178433,
"test_e2e.py::test_ptp_quickstart_multimodal_multiturn[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503]": 60.082822071621194,
"test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[audio]": 89.50578387826681,
"test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image]": 66.199569062097,
"test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[image_audio]": 62.389084175927565,
"test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B]": 7200.0001350759994238615,
"test_e2e.py::test_ptp_star_attention_example[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]": 3600.00020311586558818817,
"test_e2e.py::test_qwen_e2e_cpprunner_large_new_tokens[DeepSeek-R1-Distill-Qwen-1.5B-DeepSeek-R1-Distill-Qwen-1.5B]": 137.7278483910486,
"test_e2e.py::test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus[DeepSeek-R1-DeepSeek-R1/DeepSeek-R1]": 12134.278186964104,
"test_e2e.py::test_trtllm_bench_help_sanity[meta-llama/Llama-3.1-8B]": 109.25386995915323,
"test_e2e.py::test_trtllm_bench_iteration_log[PyTorch-non-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 163.86223009089008,
"test_e2e.py::test_trtllm_bench_iteration_log[PyTorch-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 114.50899445591494,
"test_e2e.py::test_trtllm_bench_iteration_log[TRT-non-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 285.3362849447876,
"test_e2e.py::test_trtllm_bench_iteration_log[TRT-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 310.9046222809702,
"test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]": 114.17938271397725,
"test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False]": 120.00014387606643140316,
"test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]": 115.74023819994181,
"test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-]": 276.64185731019825,
"test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-enable_request_rate]": 252.97791706770658,
"test_e2e.py::test_trtllm_benchmark_serving[gpt_oss/gpt-oss-20b]": 482.31134233786725,
"test_e2e.py::test_trtllm_benchmark_serving[llama-3.1-model/Meta-Llama-3.1-8B]": 80.71726501686499,
"test_e2e.py::test_trtllm_multimodal_benchmark_serving": 360.00022564898245036602,
"test_e2e.py::test_trtllm_serve_example": 200.09309104084969,
"test_e2e.py::test_trtllm_serve_multimodal_example": 130.2214687075466,
"test_mode: Test mode (\"stress-test\" or \"stress-stage-alone\")\"": 1771.5283138155937,
"test_unittests.py::test_unittests_v2[unittest/_torch/attention/test_attention_mla.py]": 26.32902159006335,
"test_unittests.py::test_unittests_v2[unittest/_torch/attention]": 588.56,
"test_unittests.py::test_unittests_v2[unittest/_torch/auto_deploy/unit/singlegpu]": 539.3006387590431,
"test_unittests.py::test_unittests_v2[unittest/_torch/compilation]": 31.94,
"test_unittests.py::test_unittests_v2[unittest/_torch/debugger]": 36.69,
"test_unittests.py::test_unittests_v2[unittest/_torch/executor]": 170.86,
"test_unittests.py::test_unittests_v2[unittest/_torch/misc]": 600.5,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_llama\"]": 718.749935634085,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mixtral\"]": 208.1838396479725,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mllama\"]": 749.5508671940188,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_nemotron\"]": 1952.3731448464096,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_nemotron_nas\"]": 498.8839871880482,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_out_of_tree\"]": 55.078535287990235,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_qwen\"]": 551.1881373599754,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_qwen_moe\"]": 401.2630233000382,
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_vila\"]": 79.90315388399176,
"test_unittests.py::test_unittests_v2[unittest/_torch/modules]": 158.5,
"test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu_modeling -k \"deepseek\"]": 393.0210295501165,
"test_unittests.py::test_unittests_v2[unittest/_torch/multimodal]": 23.54,
"test_unittests.py::test_unittests_v2[unittest/_torch/sampler]": 107.66,
"test_unittests.py::test_unittests_v2[unittest/_torch/speculative]": 1850.16,
"test_unittests.py::test_unittests_v2[unittest/_torch/thop/parallel]": 311.58,
"test_unittests.py::test_unittests_v2[unittest/_torch/thop/serial]": 18.96,
"test_unittests.py::test_unittests_v2[unittest/api_stability]": 33.137137457728386,
"test_unittests.py::test_unittests_v2[unittest/bindings]": 1119.2564616799355,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_build_cache.py]": 34.61376368254423,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_executor.py]": 378.7100401185453,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm.py -m \"not part0\"]": 1883.5484512336552,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm.py -m \"part0\"]": 1601.0243577323854,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"not (part0 or part1)\"]": 825.9972547292709,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"part0\"]": 163.72848848626018,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"part1\"]": 538.573951125145,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_perf_evaluator.py]": 118.36046380549669,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_pytorch.py::test_gemma3_1b_instruct_multi_lora]": 101.58543362899218,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_pytorch.py]": 539.5857984796166,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_quant.py]": 477.989566125907,
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_utils.py]": 125.15857975929976,
"test_unittests.py::test_unittests_v2[unittest/test_model_runner_cpp.py]": 973.1355891097337,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_bert_attention.py]": 99.96196278184652,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition0\"]": 77.31474154582247,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition1\"]": 84.67568279313855,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition2\"]": 75.39135546097532,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition3\"]": 78.77339706313796,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"trtllm_gen\"]": 376.012343961047,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"xqa_generic\"]": 267.40264504775405,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention_IFB.py]": 85.18935105204582,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention_no_cache.py]": 49.3486054521054,
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others]": 940.7867036014795,
"test_unittests.py::test_unittests_v2[unittest/trt/functional/test_fp4_gemm.py]": 302.49857676401734,
"test_unittests.py::test_unittests_v2[unittest/trt/functional/test_moe.py]": 220.60184395778924,
"test_unittests.py::test_unittests_v2[unittest/trt/functional]": 778.6451135131065,
"test_unittests.py::test_unittests_v2[unittest/trt/model/eagle]": 212.3223411180079,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"other\"]": 125.12117889150977,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition0\"]": 300.0489609502256,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition1\"]": 265.11671224981546,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition2\"]": 357.6496359631419,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition3\"]": 371.381394200027,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt_e2e.py]": 537.5006402550498,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_llama.py]": 1494.1103300452232,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_mamba.py]": 76.84791256207973,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_mistral.py]": 366.95385985821486,
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_nemotron_nas.py -k \"not fp8\"]": 1041.1297603696585,
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_api_multi_gpu.py]": 27.33125525712967,
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_level_api.py]": 33.818626184016466,
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_quantization.py]": 493.8186915554106,
"test_unittests.py::test_unittests_v2[unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py]": 214.35422350093722,
"test_unittests.py::test_unittests_v2[unittest/trt/quantization/test_weight_only_quant_matmul.py]": 100.81762219779193,
"test_unittests.py::test_unittests_v2[unittest/trt/quantization]": 673.2582192085683
}