mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[fix][nvbug/5244009] Fix llama 4 test lists/scout accuracy issue (#4069)
[fix] Fix llama 4 test lists Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com>
This commit is contained in:
parent
446f62bbab
commit
4b8ba7ad61
@ -119,13 +119,12 @@ class Attention(nn.Module):
|
||||
self.o_lora = LoraLayer([LoraModuleType.ATTENTION_DENSE],
|
||||
[self.hidden_size])
|
||||
|
||||
self.use_qk_norm = (
|
||||
config.pretrained_config
|
||||
and (config.pretrained_config.model_type == 'qwen3'
|
||||
or config.pretrained_config.model_type == 'qwen3_moe'))
|
||||
use_qk_norm = (config.pretrained_config and
|
||||
(config.pretrained_config.model_type == 'qwen3'
|
||||
or config.pretrained_config.model_type == 'qwen3_moe'))
|
||||
attn_cls = get_attention_backend(self.attn_backend)
|
||||
self.enable_rope_fusion = attn_cls.support_fused_rope(
|
||||
) and not self.use_qk_norm
|
||||
) and not use_qk_norm
|
||||
self.attn = create_attention(
|
||||
self.attn_backend,
|
||||
self.layer_idx,
|
||||
|
||||
@ -17,6 +17,4 @@ l0_dgx_h200:
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[ep4--attention_dp-cuda_graph-overlap_scheduler]
|
||||
# - accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] # OOM
|
||||
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[latency] # 1h
|
||||
- unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp8-pp1-trtllm-scout-enable_graph]
|
||||
- unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp1-ep1-pp8-trtllm-scout-enable_graph]
|
||||
# - unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp8-trtllm-maverick] # 3h will timeout
|
||||
- unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep1-enable_graph-tp8-trtllm-scout]
|
||||
|
||||
@ -472,8 +472,6 @@ examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padd
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5234058)
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity] SKIP (https://nvbugs/5234058)
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2] SKIP (https://nvbugs/5234058)
|
||||
unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp1-ep1-pp8-trtllm-scout-enable_graph] SKIP (https://nvbugs/5260488)
|
||||
unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[tp8-pp1-trtllm-scout-enable_graph] SKIP (https://nvbugs/5260488)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp2pp2-mtp_nextn=2-fp8kv-attention_dp-cuda_graph-overlap_scheduler] SKIP (https://nvbugs/5261055, https://nvbugs/5170160)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp2pp2-mtp_nextn=2-attention_dp-cuda_graph-overlap_scheduler] SKIP (https://nvbugs/5261055, https://nvbugs/5170160)
|
||||
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[tp2pp2-mtp_nextn=2-attention_dp] SKIP (https://nvbugs/5261055, https://nvbugs/5170160)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user