mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
7.3 KiB
7.3 KiB
| 1 | unittest_case_name | gpu | parallel_factor | comment |
|---|---|---|---|---|
| 2 | unittest/trt/quantization | NVIDIA A10 | 18 | |
| 3 | unittest/trt/model/test_gptj.py | NVIDIA A10 | 5 | |
| 4 | unittest/trt/functional | NVIDIA A10 | 6 | |
| 5 | unittest/trt/model/test_gptneox.py | NVIDIA A10 | 2 | |
| 6 | unittest/trt/attention/test_bert_attention.py | NVIDIA A10 | 17 | |
| 7 | unittest/trt/model/test_falcon.py | NVIDIA A10 | 16 | |
| 8 | unittest/trt/model/test_gpt.py -k "partition2" | NVIDIA A10 | 11 | |
| 9 | unittest/trt/model/test_gpt.py -k "partition3" | NVIDIA A10 | 11 | |
| 10 | unittest/trt/model/test_gpt.py -k "other" | NVIDIA A10 | 13 | |
| 11 | unittest/trt/attention/test_gpt_attention_IFB.py | NVIDIA A10 | 17 | |
| 12 | unittest/trt/attention/test_gpt_attention_no_cache.py | NVIDIA A10 | 23 | |
| 13 | unittest/trt/model/test_mamba.py | NVIDIA A10 | 12 | |
| 14 | unittest/trt/model/test_llama.py | NVIDIA A10 | 3 | |
| 15 | unittest/trt/attention/test_gpt_attention.py -k "partition0" | NVIDIA A10 | 14 | |
| 16 | unittest/trt/attention/test_gpt_attention.py -k "partition1" | NVIDIA A10 | 10 | |
| 17 | unittest/trt/attention/test_gpt_attention.py -k "partition2" | NVIDIA A10 | 3 | |
| 18 | unittest/trt/attention/test_gpt_attention.py -k "partition3" | NVIDIA A10 | 3 | |
| 19 | unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" | NVIDIA A10 | 2 | |
| 20 | unittest/trt/model/test_gpt.py -k "partition0" | NVIDIA A30 | 13 | |
| 21 | unittest/trt/model/test_gpt.py -k "partition1" | NVIDIA A30 | 13 | |
| 22 | unittest/trt/model/test_gpt.py -k "partition2" | NVIDIA A30 | 4 | |
| 23 | unittest/trt/model/test_gpt.py -k "partition3" | NVIDIA A30 | 4 | |
| 24 | unittest/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/python_plugin unittest/tools unittest/utils unittest/others | NVIDIA A30 | 1 | |
| 25 | unittest/llmapi/test_llm_models.py -m "part0" | NVIDIA A30 | 1 | |
| 26 | unittest/llmapi/test_llm_models.py -m "part1" | NVIDIA A30 | 1 | |
| 27 | unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" | NVIDIA A30 | 1 | |
| 28 | unittest/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/python_plugin unittest/tools unittest/utils unittest/others | NVIDIA A100X | 4 | |
| 29 | llmapi-tp-2gpu | NVIDIA H100 80GB HBM3 | 1 | |
| 30 | unittest/llmapi/test_llm_models_multi_gpu.py | NVIDIA H100 80GB HBM3 | 1 | |
| 31 | unittest/trt/model/test_gptneox.py | NVIDIA H100 80GB HBM3 | 7 | |
| 32 | unittest/trt/attention/test_bert_attention.py | NVIDIA H100 80GB HBM3 | 11 | |
| 33 | unittest/trt/model_api/test_model_quantization.py | NVIDIA H100 80GB HBM3 | 3 | |
| 34 | model-bert | NVIDIA H100 80GB HBM3 | 11 | |
| 35 | unittest/trt/model/test_gpt_e2e.py | NVIDIA H100 80GB HBM3 | 12 | |
| 36 | unittest/bindings | NVIDIA H100 80GB HBM3 | 1 | |
| 37 | unittest/llmapi/test_llm_quant.py | NVIDIA H100 80GB HBM3 | 1 | |
| 38 | unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" | NVIDIA H100 80GB HBM3 | 6 | |
| 39 | unittest/trt/functional/test_moe.py | NVIDIA H100 80GB HBM3 | 10 | |
| 40 | unittest/trt/quantization/test_weight_only_quant_matmul.py | NVIDIA H100 80GB HBM3 | 13 | |
| 41 | unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py | NVIDIA H100 80GB HBM3 | 13 | |
| 42 | unittest/trt/attention/test_gpt_attention_IFB.py | NVIDIA H100 80GB HBM3 | 11 | |
| 43 | unittest/trt/attention/test_gpt_attention_no_cache.py | NVIDIA H100 80GB HBM3 | 13 | |
| 44 | unittest/trt/model/test_mamba.py | NVIDIA H100 80GB HBM3 | 10 | |
| 45 | unittest/trt/attention/test_gpt_attention.py -k "partition0" | NVIDIA L40S | 14 | |
| 46 | unittest/trt/attention/test_gpt_attention.py -k "partition1" | NVIDIA L40S | 10 | |
| 47 | unittest/trt/attention/test_gpt_attention.py -k "partition2" | NVIDIA L40S | 6 | |
| 48 | unittest/trt/attention/test_gpt_attention.py -k "partition3" | NVIDIA L40S | 6 | |
| 49 | unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" | NVIDIA L40S | 3 | |
| 50 | unittest/trt/functional | NVIDIA L40S | 32 | |
| 51 | llmapi-tp-2gpu | NVIDIA H100 PCIe | 1 | |
| 52 | unittest/llmapi/test_llm_models_multi_gpu.py | NVIDIA H100 PCIe | 1 | |
| 53 | unittest/trt/model/test_gptneox.py | NVIDIA H100 PCIe | 7 | |
| 54 | unittest/trt/attention/test_bert_attention.py | NVIDIA H100 PCIe | 11 | |
| 55 | unittest/trt/model_api/test_model_quantization.py | NVIDIA H100 PCIe | 3 | |
| 56 | model-bert | NVIDIA H100 PCIe | 11 | |
| 57 | unittest/trt/model/test_gpt_e2e.py | NVIDIA H100 PCIe | 12 | |
| 58 | unittest/bindings | NVIDIA H100 PCIe | 1 | |
| 59 | unittest/llmapi/test_llm_quant.py | NVIDIA H100 PCIe | 1 | |
| 60 | unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" | NVIDIA H100 PCIe | 6 | |
| 61 | unittest/trt/functional/test_moe.py | NVIDIA H100 PCIe | 10 | |
| 62 | unittest/trt/quantization/test_weight_only_quant_matmul.py | NVIDIA H100 PCIe | 13 | |
| 63 | unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py | NVIDIA H100 PCIe | 13 | |
| 64 | unittest/trt/attention/test_gpt_attention_IFB.py | NVIDIA H100 PCIe | 11 | |
| 65 | unittest/trt/attention/test_gpt_attention_no_cache.py | NVIDIA H100 PCIe | 13 | |
| 66 | unittest/trt/model/test_mamba.py | NVIDIA H100 PCIe | 10 | |
| 67 | llmapi-tp-2gpu | NVIDIA H100 NVL | 1 | |
| 68 | unittest/llmapi/test_llm_models_multi_gpu.py | NVIDIA H100 NVL | 1 | |
| 69 | unittest/trt/model/test_gptneox.py | NVIDIA H100 NVL | 7 | |
| 70 | unittest/trt/attention/test_bert_attention.py | NVIDIA H100 NVL | 11 | |
| 71 | unittest/trt/model_api/test_model_quantization.py | NVIDIA H100 NVL | 3 | |
| 72 | model-bert | NVIDIA H100 NVL | 11 | |
| 73 | unittest/trt/model/test_gpt_e2e.py | NVIDIA H100 NVL | 12 | |
| 74 | unittest/bindings | NVIDIA H100 NVL | 1 | |
| 75 | unittest/llmapi/test_llm_quant.py | NVIDIA H100 NVL | 1 | |
| 76 | unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" | NVIDIA H100 NVL | 6 | |
| 77 | unittest/trt/functional/test_moe.py | NVIDIA H100 NVL | 10 | |
| 78 | unittest/trt/quantization/test_weight_only_quant_matmul.py | NVIDIA H100 NVL | 13 | |
| 79 | unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py | NVIDIA H100 NVL | 13 | |
| 80 | unittest/trt/attention/test_gpt_attention_IFB.py | NVIDIA H100 NVL | 11 | |
| 81 | unittest/trt/attention/test_gpt_attention_no_cache.py | NVIDIA H100 NVL | 13 | |
| 82 | unittest/trt/model/test_mamba.py | NVIDIA H100 NVL | 10 | |
| 83 | llmapi-tp-2gpu | NVIDIA H100 | 1 | |
| 84 | unittest/llmapi/test_llm_models_multi_gpu.py | NVIDIA H100 | 1 | |
| 85 | unittest/trt/model/test_gptneox.py | NVIDIA H100 | 7 | |
| 86 | unittest/trt/attention/test_bert_attention.py | NVIDIA H100 | 11 | |
| 87 | unittest/trt/model_api/test_model_quantization.py | NVIDIA H100 | 3 | |
| 88 | model-bert | NVIDIA H100 | 11 | |
| 89 | unittest/trt/model/test_gpt_e2e.py | NVIDIA H100 | 12 | |
| 90 | unittest/bindings | NVIDIA H100 | 1 | |
| 91 | unittest/llmapi/test_llm_quant.py | NVIDIA H100 | 1 | |
| 92 | unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" | NVIDIA H100 | 6 | |
| 93 | unittest/trt/functional/test_moe.py | NVIDIA H100 | 10 | |
| 94 | unittest/trt/quantization/test_weight_only_quant_matmul.py | NVIDIA H100 | 13 | |
| 95 | unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py | NVIDIA H100 | 13 | |
| 96 | unittest/trt/attention/test_gpt_attention_IFB.py | NVIDIA H100 | 11 | |
| 97 | unittest/trt/attention/test_gpt_attention_no_cache.py | NVIDIA H100 | 13 | |
| 98 | unittest/trt/model/test_mamba.py | NVIDIA H100 | 10 | |
| 99 | unittest/trt/attention/test_gpt_attention.py -k "partition0" | NVIDIA L40 | 14 | |
| 100 | unittest/trt/attention/test_gpt_attention.py -k "partition1" | NVIDIA L40 | 10 | |
| 101 | unittest/trt/attention/test_gpt_attention.py -k "partition2" | NVIDIA L40 | 6 | |
| 102 | unittest/trt/attention/test_gpt_attention.py -k "partition3" | NVIDIA L40 | 6 | |
| 103 | unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" | NVIDIA L40 | 3 | |
| 104 | unittest/_torch/speculative | NVIDIA Graphics Device | 4 | B200 Bring Up Board |
| 105 | unittest/_torch/thop | NVIDIA Graphics Device | 32 | B200 Bring Up Board |
| 106 | unittest/_torch/auto_deploy/unit/singlegpu -k "not test_trtllm_bench_backend_comparison" | NVIDIA Graphics Device | 4 | B200 Bring Up Board |
| 107 | unittest/_torch/speculative | NVIDIA B200 | 4 | |
| 108 | unittest/_torch/thop | NVIDIA B200 | 32 | |
| 109 | unittest/_torch/auto_deploy/unit/singlegpu -k "not test_trtllm_bench_backend_comparison" | NVIDIA B200 | 4 |