mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
tests: fix 5250460 (#4751)
Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
This commit is contained in:
parent
de0613bd83
commit
1bc3dfa490
@ -18,6 +18,7 @@ import os
|
||||
import pytest
|
||||
from defs.common import (convert_weights, generate_summary_cmd, venv_check_call,
|
||||
venv_mpi_check_call)
|
||||
from defs.conftest import get_gpu_device_list
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
|
||||
@ -30,6 +31,9 @@ def test_llm_commandr_v01_single_gpu_summary(commandr_example_root,
|
||||
llm_venv, cmodel_dir, engine_dir,
|
||||
use_weight_only):
|
||||
"Build & run commandr_v01 on single gpu."
|
||||
if "GH200" in get_gpu_device_list()[0] and not use_weight_only:
|
||||
pytest.skip("OOM on GH200. https://nvbugs/5250460")
|
||||
|
||||
print("Converting checkpoint...")
|
||||
dtype = 'float16'
|
||||
model_name = os.path.basename(llm_commandr_v01_model_root)
|
||||
|
||||
@ -17,8 +17,9 @@ from pathlib import Path
|
||||
import pytest
|
||||
from defs.common import (generate_summary_cmd, test_multi_lora_support,
|
||||
venv_check_call)
|
||||
from defs.conftest import (get_device_memory, skip_fp8_pre_ada,
|
||||
skip_post_blackwell, skip_pre_hopper)
|
||||
from defs.conftest import (get_device_memory, get_gpu_device_list,
|
||||
skip_fp8_pre_ada, skip_post_blackwell,
|
||||
skip_pre_hopper)
|
||||
from defs.trt_test_alternative import check_call
|
||||
|
||||
|
||||
@ -224,6 +225,10 @@ def test_llm_gemma_1gpu_summary(batch_size, data_type, gemma_model_root,
|
||||
llm_venv, cmodel_dir, engine_dir,
|
||||
gemma_example_root, llm_datasets_root,
|
||||
llm_rouge_root, test_case):
|
||||
if "27b" in gemma_model_root and "GH200" in get_gpu_device_list(
|
||||
)[0] and "other" in test_case:
|
||||
pytest.skip("OOM on GH200. https://nvbugs/5250460")
|
||||
|
||||
gemma_1gpu_summary(batch_size, data_type, gemma_model_root, llm_venv,
|
||||
cmodel_dir, engine_dir, gemma_example_root,
|
||||
llm_datasets_root, llm_rouge_root, test_case)
|
||||
|
||||
@ -418,8 +418,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpu
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[ep4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-] SKIP (https://nvbugs/5234002)
|
||||
examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8] SKIP (https://nvbugs/5234164)
|
||||
full::GH200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (https://nvbugs/5250460)
|
||||
full::GH200/examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2-27b-it-other-bfloat16-8] SKIP (https://nvbugs/5250460)
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5234058)
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5234058)
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity] SKIP (https://nvbugs/5234058)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user