TensorRT-LLMs/tests/integration/defs/agg_unit_mem_df.csv
QI JUN d38b8e3dd9
[None][ci] set TORCHINDUCTOR_COMPILE_THREADS for thop/parallel tests (#7489)
Signed-off-by: junq <22017000+QiJune@users.noreply.github.com>
2025-09-04 06:04:51 -07:00

7.7 KiB

1unittest_case_namegpuparallel_factorcomment
2unittest/trt/quantizationNVIDIA A1018
3unittest/trt/model/test_gptj.pyNVIDIA A105
4unittest/trt/functionalNVIDIA A106
5unittest/trt/model/test_gptneox.pyNVIDIA A102
6unittest/trt/attention/test_bert_attention.pyNVIDIA A1017
7unittest/trt/model/test_falcon.pyNVIDIA A1016
8unittest/trt/model/test_gpt.py -k "partition2"NVIDIA A1011
9unittest/trt/model/test_gpt.py -k "partition3"NVIDIA A1011
10unittest/trt/model/test_gpt.py -k "other"NVIDIA A1013
11unittest/trt/attention/test_gpt_attention_IFB.pyNVIDIA A1017
12unittest/trt/attention/test_gpt_attention_no_cache.pyNVIDIA A1023
13unittest/trt/model/test_mamba.pyNVIDIA A1012
14unittest/trt/model/test_llama.pyNVIDIA A103
15unittest/trt/attention/test_gpt_attention.py -k "partition0"NVIDIA A1014
16unittest/trt/attention/test_gpt_attention.py -k "partition1"NVIDIA A1010
17unittest/trt/attention/test_gpt_attention.py -k "partition2"NVIDIA A103
18unittest/trt/attention/test_gpt_attention.py -k "partition3"NVIDIA A103
19unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"NVIDIA A102
20unittest/trt/model/test_gpt.py -k "partition0"NVIDIA A3013
21unittest/trt/model/test_gpt.py -k "partition1"NVIDIA A3013
22unittest/trt/model/test_gpt.py -k "partition2"NVIDIA A304
23unittest/trt/model/test_gpt.py -k "partition3"NVIDIA A304
24unittest/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/python_plugin unittest/tools unittest/utils unittest/othersNVIDIA A301
25unittest/llmapi/test_llm_models.py -m "part0"NVIDIA A301
26unittest/llmapi/test_llm_models.py -m "part1"NVIDIA A301
27unittest/llmapi/test_llm_models.py -m "not (part0 or part1)"NVIDIA A301
28unittest/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/llmapi/test_mpi_session.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/python_plugin unittest/tools unittest/utils unittest/othersNVIDIA A100X4
29llmapi-tp-2gpuNVIDIA H100 80GB HBM31
30unittest/llmapi/test_llm_models_multi_gpu.pyNVIDIA H100 80GB HBM31
31unittest/trt/model/test_gptneox.pyNVIDIA H100 80GB HBM37
32unittest/trt/attention/test_bert_attention.pyNVIDIA H100 80GB HBM311
33unittest/trt/model_api/test_model_quantization.pyNVIDIA H100 80GB HBM33
34model-bertNVIDIA H100 80GB HBM311
35unittest/trt/model/test_gpt_e2e.pyNVIDIA H100 80GB HBM312
36unittest/bindingsNVIDIA H100 80GB HBM31
37unittest/llmapi/test_llm_quant.pyNVIDIA H100 80GB HBM31
38unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"NVIDIA H100 80GB HBM36
39unittest/trt/functional/test_moe.pyNVIDIA H100 80GB HBM310
40unittest/trt/quantization/test_weight_only_quant_matmul.pyNVIDIA H100 80GB HBM313
41unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.pyNVIDIA H100 80GB HBM313
42unittest/trt/attention/test_gpt_attention_IFB.pyNVIDIA H100 80GB HBM311
43unittest/trt/attention/test_gpt_attention_no_cache.pyNVIDIA H100 80GB HBM313
44unittest/trt/model/test_mamba.pyNVIDIA H100 80GB HBM310
45unittest/trt/attention/test_gpt_attention.py -k "partition0"NVIDIA L40S14
46unittest/trt/attention/test_gpt_attention.py -k "partition1"NVIDIA L40S10
47unittest/trt/attention/test_gpt_attention.py -k "partition2"NVIDIA L40S6
48unittest/trt/attention/test_gpt_attention.py -k "partition3"NVIDIA L40S6
49unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"NVIDIA L40S3
50unittest/trt/functionalNVIDIA L40S32
51llmapi-tp-2gpuNVIDIA H100 PCIe1
52unittest/llmapi/test_llm_models_multi_gpu.pyNVIDIA H100 PCIe1
53unittest/trt/model/test_gptneox.pyNVIDIA H100 PCIe7
54unittest/trt/attention/test_bert_attention.pyNVIDIA H100 PCIe11
55unittest/trt/model_api/test_model_quantization.pyNVIDIA H100 PCIe3
56model-bertNVIDIA H100 PCIe11
57unittest/trt/model/test_gpt_e2e.pyNVIDIA H100 PCIe12
58unittest/bindingsNVIDIA H100 PCIe1
59unittest/llmapi/test_llm_quant.pyNVIDIA H100 PCIe1
60unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"NVIDIA H100 PCIe6
61unittest/trt/functional/test_moe.pyNVIDIA H100 PCIe10
62unittest/trt/quantization/test_weight_only_quant_matmul.pyNVIDIA H100 PCIe13
63unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.pyNVIDIA H100 PCIe13
64unittest/trt/attention/test_gpt_attention_IFB.pyNVIDIA H100 PCIe11
65unittest/trt/attention/test_gpt_attention_no_cache.pyNVIDIA H100 PCIe13
66unittest/trt/model/test_mamba.pyNVIDIA H100 PCIe10
67llmapi-tp-2gpuNVIDIA H100 NVL1
68unittest/llmapi/test_llm_models_multi_gpu.pyNVIDIA H100 NVL1
69unittest/trt/model/test_gptneox.pyNVIDIA H100 NVL7
70unittest/trt/attention/test_bert_attention.pyNVIDIA H100 NVL11
71unittest/trt/model_api/test_model_quantization.pyNVIDIA H100 NVL3
72model-bertNVIDIA H100 NVL11
73unittest/trt/model/test_gpt_e2e.pyNVIDIA H100 NVL12
74unittest/bindingsNVIDIA H100 NVL1
75unittest/llmapi/test_llm_quant.pyNVIDIA H100 NVL1
76unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"NVIDIA H100 NVL6
77unittest/trt/functional/test_moe.pyNVIDIA H100 NVL10
78unittest/trt/quantization/test_weight_only_quant_matmul.pyNVIDIA H100 NVL13
79unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.pyNVIDIA H100 NVL13
80unittest/trt/attention/test_gpt_attention_IFB.pyNVIDIA H100 NVL11
81unittest/trt/attention/test_gpt_attention_no_cache.pyNVIDIA H100 NVL13
82unittest/trt/model/test_mamba.pyNVIDIA H100 NVL10
83llmapi-tp-2gpuNVIDIA H1001
84unittest/llmapi/test_llm_models_multi_gpu.pyNVIDIA H1001
85unittest/trt/model/test_gptneox.pyNVIDIA H1007
86unittest/trt/attention/test_bert_attention.pyNVIDIA H10011
87unittest/trt/model_api/test_model_quantization.pyNVIDIA H1003
88model-bertNVIDIA H10011
89unittest/trt/model/test_gpt_e2e.pyNVIDIA H10012
90unittest/bindingsNVIDIA H1001
91unittest/llmapi/test_llm_quant.pyNVIDIA H1001
92unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"NVIDIA H1006
93unittest/trt/functional/test_moe.pyNVIDIA H10010
94unittest/trt/quantization/test_weight_only_quant_matmul.pyNVIDIA H10013
95unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.pyNVIDIA H10013
96unittest/trt/attention/test_gpt_attention_IFB.pyNVIDIA H10011
97unittest/trt/attention/test_gpt_attention_no_cache.pyNVIDIA H10013
98unittest/trt/model/test_mamba.pyNVIDIA H10010
99unittest/trt/attention/test_gpt_attention.py -k "partition0"NVIDIA L4014
100unittest/trt/attention/test_gpt_attention.py -k "partition1"NVIDIA L4010
101unittest/trt/attention/test_gpt_attention.py -k "partition2"NVIDIA L406
102unittest/trt/attention/test_gpt_attention.py -k "partition3"NVIDIA L406
103unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"NVIDIA L403
104unittest/_torch/attentionNVIDIA Graphics Device4B200 Bring Up Board
105unittest/_torch/miscNVIDIA Graphics Device4B200 Bring Up Board
106unittest/_torch/speculativeNVIDIA Graphics Device4B200 Bring Up Board
107unittest/_torch/thop/parallelNVIDIA Graphics Device16B200 Bring Up Board
108unittest/_torch/auto_deploy/unit/singlegpu -k "not test_trtllm_bench_backend_comparison"NVIDIA Graphics Device4B200 Bring Up Board
109unittest/_torch/attentionNVIDIA B2004
110unittest/_torch/miscNVIDIA B2004
111unittest/_torch/speculativeNVIDIA B2004
112unittest/_torch/thop/parallelNVIDIA B20016
113unittest/_torch/auto_deploy/unit/singlegpu -k "not test_trtllm_bench_backend_comparison"NVIDIA B2004
114unittest/_torch/attentionNVIDIA H1004
115unittest/_torch/miscNVIDIA H1004
116unittest/_torch/speculativeNVIDIA H1002
117unittest/_torch/thop/parallelNVIDIA H10016