diff --git a/tensorrt_llm/models/enc_dec/model.py b/tensorrt_llm/models/enc_dec/model.py index 401bb8c1d4..338f16c54a 100644 --- a/tensorrt_llm/models/enc_dec/model.py +++ b/tensorrt_llm/models/enc_dec/model.py @@ -1617,7 +1617,7 @@ class DecoderModel(PretrainedModel): shape=[-1, -1, -1], dim_range=OrderedDict([ ('batch_size_beam_width', [bb_range]), - ('query_len', [1]), + ('query_len', [inlen_range]), ('encoder_input_len_2', [encoder_input_len_range]), ]), ) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index c3acdd547e..5a460788a7 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -291,7 +291,6 @@ full:B200/examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_ examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp4pp1-context_fmha] SKIP (https://nvbugs/5063469) examples/test_qwen.py::test_llm_qwen_moe_multi_gpu_summary[qwen2_57b_a14b-tp2pp2-context_fmha_fp32_acc] SKIP (https://nvbugs/5063469) examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768) -examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-disable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime] SKIP (https://nvbugspro.nvidia.com/bug/5075538) examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen2_vl_7b_instruct-enable_gemm_plugin-enable_weight_only] SKIP (https://nvbugs/5094621) examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen2.5_14b_instruct_int4-nb:4] SKIP (https://nvbugs/5094690) test_e2e.py::test_llmapi_build_command_parameters_align[llama-llama-models-v2/TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5061624)