mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
378 lines
44 KiB
Plaintext
378 lines
44 KiB
Plaintext
{
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int4]": 347.21782275289297,
|
|
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_smooth_quant": 190.484365709126,
|
|
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_int8_gptq": 578.4320518560708,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm.py -m \"part0\"]": 1601.0243577323854,
|
|
"accuracy/test_cli_flow.py::TestPhi3Mini128kInstruct::test_auto_dtype": 500.89369447529316,
|
|
"accuracy/test_cli_flow.py::TestPhi3Mini4kInstruct::test_auto_dtype": 251.17038829252124,
|
|
"examples/test_llama.py::test_llm_llama_1gpu_batched_beam_search[llama-7b]": 182.20104870200157,
|
|
"examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1]": 217.8724013082683,
|
|
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-chunked_summarization_long]": 429.02448211982846,
|
|
"examples/test_qwen2audio.py::test_llm_qwen2audio_single_gpu[qwen2_audio_7b_instruct]": 423.2417808100581,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"other\"]": 125.12117889150977,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition1\"]": 265.11671224981546,
|
|
"examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 64.8964971601963,
|
|
"test_e2e.py::test_llmapi_example_guided_decoding": 73.23708964884281,
|
|
"test_e2e.py::test_llmapi_example_inference": 66.82718145102262,
|
|
"test_e2e.py::test_llmapi_example_inference_async": 65.93082024902105,
|
|
"test_e2e.py::test_llmapi_example_inference_async_streaming": 67.49109892174602,
|
|
"test_e2e.py::test_llmapi_example_multilora": 72.87169548124075,
|
|
"test_e2e.py::test_llmapi_quickstart": 66.53727849572897,
|
|
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_auto_dtype": 347.127849099983,
|
|
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_fp8": 261.4332031469967,
|
|
"accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4": 286.4440165119886,
|
|
"perf/test_perf.py::test_perf[bert_base-cpp-ootb-float16-bs:32-input_len:32]": 111.37450777366757,
|
|
"perf/test_perf.py::test_perf[bert_base-cpp-plugin-float16-bs:32-input_len:32]": 95.00738414749503,
|
|
"perf/test_perf.py::test_perf[gpt_350m-cppmanager-plugin_ifb-float16-bs:32-input_output_len:60": 99.74059158749878,
|
|
"perf/test_perf.py::test_perf[gpt_350m-cppmanager-plugin_ifb-float16-gwp:0.0-bs:32-input_output_len:60": 98.94526879303157,
|
|
"perf/test_perf.py::test_perf[gpt_350m-cppmanager-static_batching-plugin_ifb-float16-bs:32-input_output_len:60": 100.77929892018437,
|
|
"perf/test_perf.py::test_perf[roberta_base-cpp-plugin-float16-bs:32-input_len:128+512]": 140.2516261599958,
|
|
"accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype": 725.8308991710655,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_attention_ootb": 448.54090467840433,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_auto_dtype": 376.7764785774052,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_context_fmha_disabled": 356.8461561538279,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_cuda_graph": 365.0803712736815,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_weight_streaming_ootb": 222.17091258615255,
|
|
"accuracy/test_cli_flow.py::TestGpt2Medium::test_fp8": 193.54337832704186,
|
|
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_autoq": 1058.2184530100785,
|
|
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8": 368.3140486832708,
|
|
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_auto_dtype": 167.0847301799804,
|
|
"accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype": 221.9660275951028,
|
|
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=False-typical_acceptance=True]": 820.5789388604462,
|
|
"accuracy/test_cli_flow.py::TestVicuna7B::test_lookahead": 947.7913959696889,
|
|
"examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-disable_weight_only]": 273.7859199331142,
|
|
"examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle2]": 246.98607586231083,
|
|
"examples/test_eagle.py::test_llm_eagle_1gpu_modelopt_ckpt[llama3.1-eagle-8b-hf_v0.5-float16-bs8]": 489.4540088879876,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 2954.5586752621457,
|
|
"examples/test_granite.py::test_granite_bf16_lora[granite-3.0-1b-a400m-instruct]": 140.08338637300767,
|
|
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-v3-8b-instruct-hf]": 317.46144750900567,
|
|
"examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8]": 524.8282293006778,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[Phi-4-multimodal-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 644.3520091949031,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1]": 492.22362083010375,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1]": 333.81485258904286,
|
|
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8]": 411.88197461143136,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_quantization.py]": 493.8186915554106,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_beam_search_large": 730.1395341157913,
|
|
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=False-chunked_context=False-typical_acceptance=False]": 422.75362031999975,
|
|
"test_mode: Test mode (\"stress-test\" or \"stress-stage-alone\")\"": 1771.5283138155937,
|
|
"test_e2e.py::test_gpt3_175b_1layers_build_only": 131.34366285055876,
|
|
"test_e2e.py::test_llmapi_chat_example": 105.19824166595936,
|
|
"test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-]": 276.64185731019825,
|
|
"test_unittests.py::test_unittests_v2[unittest/api_stability]": 33.137137457728386,
|
|
"test_unittests.py::test_unittests_v2[unittest/bindings]": 1119.2564616799355,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_build_cache.py]": 34.61376368254423,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_mistral.py]": 366.95385985821486,
|
|
"accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype": 429.80293437838554,
|
|
"examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]": 383.3182801879011,
|
|
"examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]": 293.03625723719597,
|
|
"test_unittests.py::test_unittests_v2[unittest/test_model_runner_cpp.py]": 973.1355891097337,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/eagle]": 212.3223411180079,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_api_multi_gpu.py]": 27.33125525712967,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_level_api.py]": 33.818626184016466,
|
|
"accuracy/test_cli_flow.py::TestQwen2_0_5BInstruct::test_weight_only": 399.7237217463553,
|
|
"accuracy/test_llm_api.py::TestQwen2_5_1_5BInstruct::test_weight_only": 137.54228180646896,
|
|
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs2]": 249.52418848499656,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-bfloat16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 281.1201816312969,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 233.94542215764523,
|
|
"examples/test_granite.py::test_llm_granite[granite-3.0-1b-a400m-instruct-bfloat16]": 141.05149138718843,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[deplot-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 249.87254932150245,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 274.8723033480346,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_nemotron_nas.py -k \"not fp8\"]": 1041.1297603696585,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_weight_only[int8]": 358.4815372042358,
|
|
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_cpp_runtime]": 249.98457504063845,
|
|
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime]": 225.60136043280363,
|
|
"examples/test_llama.py::test_llm_llama_v1_1gpu_kv_cache_reuse_with_prompt_table[llama-7b]": 167.92376559507102,
|
|
"examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.2-1b-disable_fp8]": 382.12588274572045,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition1\"]": 84.67568279313855,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition2\"]": 75.39135546097532,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition3\"]": 78.77339706313796,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_beam_search": 383.99572690576315,
|
|
"accuracy/test_cli_flow.py::TestLlama3_8BInstructGradient1048k::test_long_context": 713.1731983916834,
|
|
"accuracy/test_cli_flow.py::TestLlama3_8BInstructGradient1048k::test_long_context_ppl": 858.7944585508667,
|
|
"accuracy/test_cli_flow.py::TestMamba130M::test_auto_dtype": 144.6635948382318,
|
|
"accuracy/test_cli_flow.py::TestMinitron4BBase::test_fp8": 378.120541986078,
|
|
"accuracy/test_cli_flow.py::TestNemotronMini4BInstruct::test_fp8_prequantized": 208.21560259815305,
|
|
"accuracy/test_cli_flow.py::TestPhi2::test_auto_dtype": 284.1176424920559,
|
|
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_float32": 171.85410665394738,
|
|
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=True-typical_acceptance=False]": 1072.9654933288693,
|
|
"accuracy/test_cli_flow.py::TestVicuna7B::test_eagle[cuda_graph=True-chunked_context=False-typical_acceptance=False]": 910.3428834918886,
|
|
"examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle1]": 254.24225717037916,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-enable_fp8]": 1074.875556848012,
|
|
"examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b]": 259.4826051471755,
|
|
"examples/test_granite.py::test_granite_bf16_lora[granite-3.0-2b-instruct]": 146.46410073013976,
|
|
"examples/test_mistral.py::test_llm_mistral_nemo_minitron_fp8_quantization[Mistral-NeMo-Minitron-8B-Instruct]": 464.4887059601024,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 282.8564471802674,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 275.5947739640251,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[llava-1.5-7b-hf-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 459.2980541479774,
|
|
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb5-bs8]": 386.68252966180444,
|
|
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_py_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8]": 429.239758990705,
|
|
"examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-disable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime]": 327.95307156071067,
|
|
"test_e2e.py::test_build_time_benchmark_sanity": 165.71592589840293,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_bert_attention.py]": 99.96196278184652,
|
|
"cpp/test_e2e.py::test_benchmarks[gpt-80]": 1376.0404928650241,
|
|
"accuracy/test_cli_flow.py::TestPhi3Small128kInstruct::test_auto_dtype": 512.450893450994,
|
|
"accuracy/test_llm_api.py::TestLlama3_1_8B::test_fp8_rowwise": 361.5573864541948,
|
|
"examples/test_llama.py::test_llm_llama_v3_dora_1gpu[commonsense-llama-v3-8b-dora-r32-llama-v3-8b-hf-base_fp16]": 517.2770831151865,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/quantization]": 673.2582192085683,
|
|
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertModel-bert/bert-base-uncased]": 121.36917966976762,
|
|
"test_e2e.py::test_llmapi_load_engine_from_build_command_with_lora[llama-llama-models-v2/llama-v2-7b-hf]": 225.2778383679688,
|
|
"test_e2e.py::test_openai_misc_example": 256.0453990884125,
|
|
"- fp16 and fp8 to test quantization\"": 289.5711575206369,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"xqa_generic\"]": 267.40264504775405,
|
|
"accuracy/test_cli_flow.py::TestLlama2_7B::test_weight_sparsity": 613.5882918275893,
|
|
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_weight_streaming[0.1]": 323.6820353940129,
|
|
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs2]": 257.3995385244489,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:2-disable_fp8]": 276.10329104214907,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-vision-trtllm-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]": 306.38610201328993,
|
|
"examples/test_prompt_lookup.py::test_llm_prompt_lookup_1gpu[streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-prompt_lookup_num_tokens_8-float16-bs2]": 195.90045699477196,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition2\"]": 357.6496359631419,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 413.903915906325,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 143.841789112892,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=FLASHINFER-torch_compile=False]": 307.12596721109,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=TRTLLM-torch_compile=True]": 166.85348949534819,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=FLASHINFER-torch_compile=True]": 226.39608797896653,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=TRTLLM-torch_compile=False]": 103.82129427790642,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=TRTLLM-torch_compile=True]": 164.91815144987777,
|
|
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu[DeepSeek-V3-Lite-fp8]": 90.40784636512399,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8]": 238.76137515995651,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-True-DeepSeek-V3-Lite-fp8/fp8]": 67.32832619687542,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-False-TinyLlama-1.1B-Chat-v1.0]": 46.302398771978915,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-True-TinyLlama-1.1B-Chat-v1.0]": 38.81214914191514,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch -k \"not (modeling or multi_gpu or auto_deploy)\"]": 1186.6702785710804,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mixtral\"]": 208.1838396479725,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu_modeling -k \"deepseek\"]": 393.0210295501165,
|
|
"cpp/test_e2e.py::test_model[-gpt_executor-80]": 4016.7569622844458,
|
|
"cpp/test_e2e.py::test_model[-gpt_tests-80]": 1817.8153839111328,
|
|
"cpp/test_unit_tests.py::test_unit_tests[executor-80]": 339.0683519244194,
|
|
"cpp/test_unit_tests.py::test_unit_tests[kernels-80]": 846.0403860099614,
|
|
"test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 21.019993914989755,
|
|
"test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 18.753523574909195,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False]": 278.4781197870616,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False]": 142.41076623182744,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False]": 113.76943837082945,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video-False]": 96.37042473605834,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image-False]": 128.0739826040808,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image-True]": 128.0739826040808,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False]": 97.74500772892497,
|
|
"test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True]": 97.74500772892497,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/auto_deploy/unit/singlegpu]": 539.3006387590431,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mllama\"]": 749.5508671940188,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_nemotron_nas\"]": 498.8839871880482,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_out_of_tree\"]": 55.078535287990235,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_qwen\"]": 551.1881373599754,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_vila\"]": 79.90315388399176,
|
|
"examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_cpp_session-tp1]": 460.1370678450912,
|
|
"examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp1]": 403.39630596572533,
|
|
"examples/test_medusa.py::test_llm_medusa_1gpu[use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs8]": 460.24718615040183,
|
|
"examples/test_redrafter.py::test_llm_redrafter_1gpu[use_cpp_session-redrafter-vicuna-7b-v1.3-bfloat16-dl5-nb8-bs8]": 204.7229775050655,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_nemotron\"]": 1952.3731448464096,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_qwen_moe\"]": 401.2630233000382,
|
|
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_low_latency_gemm_plugin": 482.50407074484974,
|
|
"accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_fp8_prequantized": 171.8214656477794,
|
|
"accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=False]": 854.6058550588787,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-disable_fp8]": 422.4394793640822,
|
|
"examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-9b-it]": 317.7816583644599,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]": 411.7690062429756,
|
|
"test_cache.py::test_cache_sanity": 0.0006845169700682163,
|
|
"test_e2e.py::test_trtllm_bench_help_sanity[meta-llama/Llama-3.1-8B]": 109.25386995915323,
|
|
"test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-enable_request_rate]": 252.97791706770658,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_quant.py]": 477.989566125907,
|
|
"accuracy/test_cli_flow.py::TestLlama7B::test_manage_weights": 536.7081215977669,
|
|
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_logits-draft_len_4-float16-bs2]": 244.9744301661849,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-disable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 218.02495155483484,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-disable_gemm_plugin-disable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 218.7171499580145,
|
|
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:4]": 374.5337073504925,
|
|
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_deep_2.4b-float16-nb:4]": 243.4259528592229,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[non_streaming-use_cpp_session-enable_gemm_plugin]": 114.20040711760521,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[streaming-use_cpp_session-enable_gemm_plugin]": 113.51056583970785,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[non_streaming-use_cpp_session]": 194.89961875230074,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[streaming-use_cpp_session]": 195.00627667084336,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[streaming-use_cpp_session]": 194.72326660901308,
|
|
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization]": 603.6547773182392,
|
|
"examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization_long]": 391.7267559207976,
|
|
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_py_session-recurrentgemma-2b-flax-no_paged_cache-disable_quant-float16-enable_attn_plugin-disable_gemm_plugin]": 286.15992603078485,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"part0\"]": 163.72848848626018,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt_e2e.py]": 537.5006402550498,
|
|
"accuracy/test_cli_flow.py::TestMixtral8x7B::test_nvfp4_prequantized": 471.08943115500733,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"trtllm_gen\"]": 376.012343961047,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/functional/test_fp4_gemm.py]": 302.49857676401734,
|
|
"accuracy/test_cli_flow.py::TestStarcoder2_15B::test_smooth_quant_ootb": 621.3599092587829,
|
|
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only[int4]": 371.7965512983501,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_executor.py]": 378.7100401185453,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"part1\"]": 538.573951125145,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others]": 940.7867036014795,
|
|
"accuracy/test_cli_flow.py::TestMinitron4BBase::test_auto_dtype": 189.79791952297091,
|
|
"accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype": 475.5876609608531,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 217.96836187317967,
|
|
"examples/test_internlm.py::test_llm_internlm2_7b_1node_1gpu[bfloat16-enable_context_fmha-enable_gemm_plugin-enable_attention_plugin-nb:2]": 269.7412294782698,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[Qwen2-VL-7B-Instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:4]": 656.4784073680639,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition0\"]": 300.0489609502256,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt.py -k \"partition3\"]": 371.381394200027,
|
|
"accuracy/test_cli_flow.py::TestVicuna7B::test_medusa[cuda_graph=True]": 553.1062062960118,
|
|
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin]": 384.9690850973129,
|
|
"test_e2e.py::test_openai_chat_multimodal_example": 215.8254322744906,
|
|
"test_e2e.py::test_trtllm_serve_multimodal_example": 130.2214687075466,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_utils.py]": 125.15857975929976,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention_no_cache.py]": 49.3486054521054,
|
|
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_gemm_swiglu_plugin": 451.02113576978445,
|
|
"accuracy/test_cli_flow.py::TestLongAlpaca7B::test_multiblock_aggressive": 674.175037201494,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 498.1236152825877,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention_IFB.py]": 85.18935105204582,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py]": 214.35422350093722,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/quantization/test_weight_only_quant_matmul.py]": 100.81762219779193,
|
|
"accuracy/test_cli_flow.py::TestLlama2_7B::test_auto_dtype": 444.6878175288439,
|
|
"test_e2e.py::test_llmapi_exit": 32.64902823418379,
|
|
"test_e2e.py::test_llmapi_server_example": 112.925546400249,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/functional]": 778.6451135131065,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_mamba.py]": 76.84791256207973,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 506.1045090719126,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 184.20976317999884,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 202.37037238897756,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 246.64391099987552,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=TRTLLM-torch_compile=True]": 313.69273760309443,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=FLASHINFER-torch_compile=False]": 409.8932851999998,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=FLASHINFER-torch_compile=True]": 344.8807112099603,
|
|
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu[DeepSeek-V3-Lite-fp8]": 224.28071974776685,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[False-True-DeepSeek-V3-Lite-fp8/fp8]": 77.51831256924197,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8]": 99.81417108187452,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-False-TinyLlama-1.1B-Chat-v1.0]": 48.16434509307146,
|
|
"test_e2e.py::test_trtllm_bench_iteration_log[PyTorch-non-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 163.86223009089008,
|
|
"test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]": 115.74023819994181,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch -k \"modeling_llama\"]": 718.749935634085,
|
|
"accuracy/test_cli_flow.py::TestGpt2::test_int8_kv_cache": 399.65961667895317,
|
|
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_int4_awq_int8_kv_cache": 392.90223736315966,
|
|
"accuracy/test_cli_flow.py::TestQwen2_7BInstruct::test_int4_awq_prequantized": 604.7383968606591,
|
|
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only[int8]": 159.531545445323,
|
|
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only_int8_kv_cache[int8]": 184.35870655626059,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_models.py -m \"not (part0 or part1)\"]": 825.9972547292709,
|
|
"cpp/test_e2e.py::test_benchmarks[bart-90]": 271.95234084688127,
|
|
"cpp/test_e2e.py::test_model[-bart-90]": 391.84748707409017,
|
|
"cpp/test_e2e.py::test_model[-eagle-86]": 850.5158995762467,
|
|
"cpp/test_e2e.py::test_model[-mamba-86]": 893.8684413917363,
|
|
"cpp/test_e2e.py::test_model[-medusa-86]": 577.0913726426661,
|
|
"cpp/test_e2e.py::test_model[-redrafter-86]": 356.56682327389717,
|
|
"cpp/test_e2e.py::test_benchmarks[t5-90]": 244.83684724476188,
|
|
"cpp/test_e2e.py::test_model[-enc_dec_language_adapter-90]": 356.5558080910705,
|
|
"cpp/test_e2e.py::test_model[-t5-90]": 167.93334361724555,
|
|
"cpp/test_e2e.py::test_model[fp8-llama-90]": 810.9923318810761,
|
|
"cpp/test_unit_tests.py::test_unit_tests[batch_manager-90]": 230.49758478673175,
|
|
"cpp/test_unit_tests.py::test_unit_tests[common-90]": 12.953252204693854,
|
|
"cpp/test_unit_tests.py::test_unit_tests[executor-90]": 317.7621980938129,
|
|
"cpp/test_unit_tests.py::test_unit_tests[kernels-90]": 554.3154804841615,
|
|
"cpp/test_unit_tests.py::test_unit_tests[layers-90]": 1288.0563295381144,
|
|
"cpp/test_unit_tests.py::test_unit_tests[runtime-90]": 876.2420815587975,
|
|
"cpp/test_unit_tests.py::test_unit_tests[thop-90]": 2.2652571727521718,
|
|
"cpp/test_unit_tests.py::test_unit_tests[utils-90]": 3.6415831856429577,
|
|
"accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_meta_recipe": 634.7149123200215,
|
|
"examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_fp16]": 895.7611340929288,
|
|
"examples/test_nemotron_nas.py::test_nemotron_nas_summary_1gpu[DeciLM-7B]": 335.41048416192643,
|
|
"examples/test_phi.py::test_llm_phi_lora_1gpu[Phi-3-mini-4k-instruct-ru-lora-Phi-3-mini-4k-instruct-lora_fp16-base_fp16]": 217.61977925198153,
|
|
"cpp/test_e2e.py::test_model[-gpt-80]": 2498.94351779297,
|
|
"cpp/test_unit_tests.py::test_unit_tests[batch_manager-80]": 380.8567730002105,
|
|
"cpp/test_unit_tests.py::test_unit_tests[common-80]": 20.237869411706924,
|
|
"cpp/test_unit_tests.py::test_unit_tests[layers-80]": 2141.2598778679967,
|
|
"cpp/test_unit_tests.py::test_unit_tests[runtime-80]": 1491.7047495394945,
|
|
"cpp/test_unit_tests.py::test_unit_tests[thop-80]": 3.3458465598523617,
|
|
"cpp/test_unit_tests.py::test_unit_tests[utils-80]": 5.461210697889328,
|
|
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_gemm_plugin": 593.3573900908232,
|
|
"examples/test_gemma.py::test_llm_gemma_1gpu_summary_vswa[gemma-3-1b-it-other-bfloat16-8]": 195.3050664511975,
|
|
"test_e2e.py::test_llmapi_quickstart_atexit": 110.45052940770984,
|
|
"disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]": 67.3897166326642,
|
|
"disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]": 98.97588296607137,
|
|
"disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 67.9668476767838,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/test_attention_mla.py]": 26.32902159006335,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 591.2785023800097,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]": 306.84709841990843,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 220.57452515885234,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 202.22269394202158,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 165.08514453098178,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 252.70569713797886,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 85.24235329206567,
|
|
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]": 81.43792725296225,
|
|
"test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B]": 109.26379436196294,
|
|
"test_e2e.py::test_ptp_quickstart_advanced_mixed_precision": 80.88908524392173,
|
|
"test_e2e.py::test_ptp_quickstart_advanced_mtp[DeepSeek-V3-Lite-BF16-DeepSeek-V3-Lite/bf16]": 99.42739840806462,
|
|
"test_unittests.py::test_unittests_v2[unittest/_torch/speculative/test_eagle3.py]": 317.8708840459585,
|
|
"accuracy/test_cli_flow.py::TestLlama7B::test_auto_dtype": 402.75543826818466,
|
|
"examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-BertModel-bert/bert-base-uncased]": 111.17977902293205,
|
|
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-130m-float16-enable_gemm_plugin]": 112.04011878371239,
|
|
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-disable_gemm_plugin]": 129.8332964628935,
|
|
"- t5_base for t5 baseline.\"": 197.2081038095057,
|
|
"test_e2e.py::test_llmapi_load_ckpt_from_convert_command": 180.59318951144814,
|
|
"test_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf]": 243.36900701373816,
|
|
"accuracy/test_cli_flow.py::TestLlama7B::test_streamingllm": 601.9727729707956,
|
|
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]": 118.37521690130234,
|
|
"test_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf]": 200.82293555140495,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/model/test_llama.py]": 1494.1103300452232,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition0\"]": 77.31474154582247,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 295.3527018489549,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 143.84012729604729,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]": 107.58471493399702,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]": 205.7252635700861,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 113.82226522010751,
|
|
"examples/test_llama.py::test_llm_llama_1gpu[llama-3.1-8b-instruct-hf-fp8-enable_fp8-float16-summarization-nb:1]": 853.2910006027669,
|
|
"test_e2e.py::test_openai_chat_example": 876.1966922096908,
|
|
"test_e2e.py::test_trtllm_serve_example": 200.09309104084969,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_perf_evaluator.py]": 118.36046380549669,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm_pytorch.py]": 539.5857984796166,
|
|
"accuracy/test_cli_flow.py::TestPhi3Small8kInstruct::test_auto_dtype": 306.3908146258909,
|
|
"examples/test_llama.py::test_llm_llama_1gpu_fp8_kv_cache[llama-v2-7b-hf-bfloat16]": 313.6555140609853,
|
|
"examples/test_qwen.py::test_llm_qwen1_5_7b_single_gpu_lora[qwen1.5_7b_chat-Qwen1.5-7B-Chat-750Mb-lora]": 338.59182655182667,
|
|
"accuracy/test_llm_api.py::TestQwen2_7BInstruct::test_weight_only": 594.9357111975551,
|
|
"examples/test_draft_target_model.py::test_llm_draft_target_model_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-draft_len_4-float16-bs2]": 241.73137632384896,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-bfloat16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 232.2934926636517,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-disable_gemm_plugin-disable_attention_plugin-disable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]": 220.32321695238352,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:2-disable_fp8]": 205.83720442652702,
|
|
"examples/test_exaone.py::test_llm_exaone_1gpu[disable_weight_only-exaone_3.0_7.8b_instruct-float16-nb:1]": 302.19654186069965,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_bad_words_1gpu[non_streaming-use_py_session]": 200.52475621178746,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[non_streaming-use_cpp_session]": 194.90547297894955,
|
|
"examples/test_gpt.py::test_llm_gpt2_medium_stop_words_1gpu[non_streaming-use_py_session]": 194.89357279613614,
|
|
"examples/test_granite.py::test_llm_granite[granite-3.0-2b-instruct-bfloat16]": 155.801738537848,
|
|
"examples/test_llama.py::test_llm_llama_v2_1gpu_auto_parallel[llama-v2-7b-hf]": 535.973838724196,
|
|
"examples/test_prompt_lookup.py::test_llm_prompt_lookup_1gpu[no_streaming-gpt2-use_cpp_session-use_tokens-max_matching_ngram_size_2-prompt_lookup_num_tokens_8-float16-bs2]": 196.1214354224503,
|
|
"examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-int4_awq-float16-enable_attn_plugin-enable_gemm_plugin]": 648.7579195387661,
|
|
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_smooth_quant_ootb": 457.93785213679075,
|
|
"accuracy/test_cli_flow.py::TestLlama3_2_1B::test_smooth_quant_ootb_manage_weights": 216.66169160604477,
|
|
"accuracy/test_cli_flow.py::TestTinyLlama1_1BChat::test_weight_only_manage_weights[int4]": 161.57166086137295,
|
|
"test_unittests.py::test_unittests_v2[unittest/llmapi/test_llm.py -m \"not part0\"]": 1883.5484512336552,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 89.92349556891713,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]": 175.661773331929,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 90.21807348495349,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4": 56.31924073398113,
|
|
"test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]": 56.05445321695879,
|
|
"test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]": 114.17938271397725,
|
|
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.2-1b]": 117.10959041584283,
|
|
"examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1]": 385.0684349639341,
|
|
"examples/test_qwen.py::test_llm_hf_qwen_multi_lora_1gpu[qwen2.5_1.5b_instruct]": 193.38715927954763,
|
|
"test_e2e.py::test_trtllm_bench_iteration_log[TRT-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 310.9046222809702,
|
|
"test_unittests.py::test_unittests_v2[unittest/trt/functional/test_moe.py]": 220.60184395778924,
|
|
"examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-RobertaModel-bert/roberta-base]": 115.47540166974068,
|
|
"examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaModel-bert/roberta-base]": 122.99858937039971,
|
|
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-130m-float16-disable_gemm_plugin]": 136.8141469657421,
|
|
"examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-disable_gemm_plugin]": 405.1586506664753,
|
|
"test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding--]": 192.74050169810653,
|
|
"test_e2e.py::test_mistral_e2e[use_py_session---]": 160.08483010903,
|
|
"test_e2e.py::test_mistral_e2e[use_py_session-remove_input_padding--]": 157.39577213302255,
|
|
"accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8": 510.68390227202326,
|
|
"examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]": 568.2032693652436,
|
|
"examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.1-8b]": 160.33107751235366,
|
|
"examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-cnn_dailymail-Qwen2-VL-7B-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False]": 332.0248579243198,
|
|
"test_e2e.py::test_mistral_large_hidden_vocab_size": 81.36711680702865,
|
|
"test_e2e.py::test_trtllm_bench_iteration_log[TRT-non-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 285.3362849447876,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]": 647.6109309499152,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[-attention_dp-cuda_graph-overlap_scheduler-torch_compile=False]": 326.1317654890008,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=disable-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]": 226.01353620411828,
|
|
"accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=eagle-attention_dp-cuda_graph-overlap_scheduler-torch_compile=False]": 336.02580665098503,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=FLASHINFER-torch_compile=True]": 443.91388061689213,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=TRTLLM-torch_compile=False]": 191.10617867391557,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=FLASHINFER-torch_compile=False]": 237.24446990108117,
|
|
"accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=TRTLLM-torch_compile=False]": 174.38962662010454,
|
|
"accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency]": 324.3035402488895,
|
|
"accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]": 149.19146074401215,
|
|
"disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp[DeepSeek-V3-Lite-fp8]": 124.17078560194932,
|
|
"disaggregated/test_disaggregated.py::test_disaggregated_load_balance[TinyLlama-1.1B-Chat-v1.0]": 73.48997121001594,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[False-False-DeepSeek-V3-Lite-fp8/fp8]": 78.98068026197143,
|
|
"disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-True-TinyLlama-1.1B-Chat-v1.0]": 36.88020430901088,
|
|
"test_e2e.py::test_trtllm_bench_iteration_log[PyTorch-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 114.50899445591494
|
|
}
|