mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[Infra] - Minor clean-up and test Ubuntu mirrors (#4829)
Signed-off-by: Yanchao Lu <yanchaol@nvidia.com> Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> Co-authored-by: QI JUN <22017000+QiJune@users.noreply.github.com>
This commit is contained in:
parent
eb2d51a429
commit
8166649d03
@ -314,7 +314,7 @@ def main(args):
|
||||
theta=pretrain_config.rotary_base,
|
||||
)
|
||||
|
||||
if batch_size == 0:
|
||||
if batch_size == 0 or len(batch_input_ids) == 0:
|
||||
return [], [], [], {}
|
||||
input_lengths = [x.size(0) for x in batch_input_ids]
|
||||
|
||||
|
||||
@ -1006,6 +1006,12 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
|
||||
]
|
||||
}
|
||||
|
||||
if (env.testPhase2StageName) {
|
||||
parameters += [
|
||||
'testPhase2StageName': env.testPhase2StageName,
|
||||
]
|
||||
}
|
||||
|
||||
echo "trigger SBSA test job, params: ${parameters}"
|
||||
|
||||
def status = triggerJob(
|
||||
|
||||
@ -1581,13 +1581,11 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
"H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2],
|
||||
"H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2],
|
||||
"B200_PCIe-Triton-Python-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
|
||||
"DGX_H100-4_GPUs-TensorRT-[Post-Merge]-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
|
||||
// "A100_80GB_PCIE-TensorRT-Perf-1": ["a100-80gb-pcie", "l0_perf", 1, 1],
|
||||
"H100_PCIe-TensorRT-Perf-1": ["h100-cr", "l0_perf", 1, 1],
|
||||
"H100_PCIe-PyTorch-Perf-1": ["h100-cr", "l0_perf", 1, 1],
|
||||
"DGX_H200-8_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x8", "l0_dgx_h200", 1, 1, 8],
|
||||
"DGX_H200-4_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x4", "l0_dgx_h200", 1, 2, 4],
|
||||
"DGX_H200-4_GPUs-PyTorch-[Post-Merge]-2": ["dgx-h200-x4", "l0_dgx_h200", 2, 2, 4],
|
||||
"DGX_H200-4_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x4", "l0_dgx_h200", 1, 1, 4],
|
||||
"DGX_H200-4_GPUs-TensorRT-[Post-Merge]-1": ["dgx-h200-x4", "l0_dgx_h200", 1, 1, 4],
|
||||
]
|
||||
|
||||
parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
|
||||
@ -1636,7 +1634,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
fullSet += SBSASlurmTestConfigs.keySet()
|
||||
|
||||
if (env.targetArch == AARCH64_TRIPLE) {
|
||||
parallelJobs = SBSASlurmTestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
|
||||
parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
|
||||
runLLMTestlistOnPlatform(pipeline, values[0], values[1], LINUX_AARCH64_CONFIG, false, key, values[2], values[3])
|
||||
}]]}
|
||||
|
||||
|
||||
@ -210,39 +210,3 @@ l0_dgx_h100:
|
||||
- examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-cpp_e2e:False-nb:1]
|
||||
- examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-vision-trtllm-pp:1-tp:2-float16-bs:1-cpp_e2e:False-nb:1]
|
||||
- deterministic/test_mixtral_deterministic.py::test_llm_mixtral_4gpus_deterministic[Mixtral-8x7B-Instruct-v0.1-float16]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 4
|
||||
lte: 4
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h100*'
|
||||
linux_distribution_name: ubuntu*
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
auto_trigger: others
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]
|
||||
- examples/test_recurrentgemma.py::test_llm_recurrentgemma_2gpu[recurrentgemma-2b]
|
||||
- examples/test_mamba.py::test_llm_mamba2_2gpu[mamba-codestral-7B-v0.1]
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_smooth_quant_ootb_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_prequantized_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_gptq_prequantized_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[disable_gemm_allreduce_plugin]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[enable_gemm_allreduce_plugin]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_tp4[disable_gemm_allreduce_plugin]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-enable_fp8_context_fmha]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-disable_fp8_context_fmha]
|
||||
- accuracy/test_cli_flow.py::TestPhi2::test_tp2
|
||||
- llmapi/test_llm_e2e.py::test_llmapi_quant_llama_70b
|
||||
- llmapi/test_llm_examples.py::test_llmapi_example_distributed_autopp_tp2
|
||||
- examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:2-pp:2-nb:1-disable_fp8]
|
||||
- examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp2]
|
||||
- unittest/llmapi/apps/_test_openai_multi_gpu.py -m "part0"
|
||||
- test_e2e.py::test_trtllm_bench_mgmn
|
||||
|
||||
@ -9,6 +9,7 @@ l0_dgx_h200:
|
||||
gpu:
|
||||
- '*h200*'
|
||||
linux_distribution_name: ubuntu*
|
||||
cpu: x86_64
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: pytorch
|
||||
@ -31,6 +32,7 @@ l0_dgx_h200:
|
||||
gpu:
|
||||
- '*h200*'
|
||||
linux_distribution_name: ubuntu*
|
||||
cpu: x86_64
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: pytorch
|
||||
@ -89,3 +91,39 @@ l0_dgx_h200:
|
||||
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp2pp2-fp8kv=True-attn_backend=FLASHINFER-torch_compile=False]
|
||||
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp2pp2-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True]
|
||||
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=False]
|
||||
- condition:
|
||||
ranges:
|
||||
system_gpu_count:
|
||||
gte: 4
|
||||
lte: 4
|
||||
wildcards:
|
||||
gpu:
|
||||
- '*h200*'
|
||||
linux_distribution_name: ubuntu*
|
||||
cpu: x86_64
|
||||
terms:
|
||||
stage: post_merge
|
||||
backend: tensorrt
|
||||
tests:
|
||||
# ------------- TRT tests ---------------
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]
|
||||
- examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]
|
||||
- examples/test_recurrentgemma.py::test_llm_recurrentgemma_2gpu[recurrentgemma-2b]
|
||||
- examples/test_mamba.py::test_llm_mamba2_2gpu[mamba-codestral-7B-v0.1]
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_smooth_quant_ootb_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_prequantized_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_gptq_prequantized_tp2
|
||||
- accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[disable_gemm_allreduce_plugin]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[enable_gemm_allreduce_plugin]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_tp4[disable_gemm_allreduce_plugin]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-enable_fp8_context_fmha]
|
||||
- accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-disable_fp8_context_fmha]
|
||||
- accuracy/test_cli_flow.py::TestPhi2::test_tp2
|
||||
- llmapi/test_llm_e2e.py::test_llmapi_quant_llama_70b
|
||||
- llmapi/test_llm_examples.py::test_llmapi_example_distributed_autopp_tp2
|
||||
- examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:2-pp:2-nb:1-disable_fp8]
|
||||
- examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp2]
|
||||
- unittest/llmapi/apps/_test_openai_multi_gpu.py -m "part0"
|
||||
- test_e2e.py::test_trtllm_bench_mgmn
|
||||
|
||||
@ -9,6 +9,7 @@ l0_gh200:
|
||||
gpu:
|
||||
- '*h200*'
|
||||
linux_distribution_name: ubuntu*
|
||||
cpu: aarch64
|
||||
terms:
|
||||
stage: pre_merge
|
||||
backend: tensorrt
|
||||
|
||||
@ -461,3 +461,9 @@ test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKI
|
||||
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573)
|
||||
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)
|
||||
test_e2e.py::test_openai_multi_chat_example SKIP (https://nvbugs/5236980)
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] SKIP (https://nvbugs/5318059)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5318087)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5318087)
|
||||
unittest/_torch/auto_deploy/integration/test_ad_build.py SKIP (https://nvbugs/5318103)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5318143)
|
||||
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=True] SKIP (https://nvbugs/5318143)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user