diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py index 4cebfae58b..a40b9b9045 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor.py @@ -1181,6 +1181,17 @@ class PyExecutor: def _validate_request(self, request: LlmRequest): if isinstance(self.model_engine.model, DecoderModelForCausalLM): + # Only skip token‐range checks for Llama4 when the request has multimodal data + from ..models.modeling_llama import Llama4ForConditionalGeneration + if isinstance(self.model_engine.model, + Llama4ForConditionalGeneration): + has_mm = bool(request.py_multimodal_data) + if has_mm: + logger.debug( + f"Skipping token-range validation for {type(self.model_engine.model).__name__} " + "(multimodal request)") + return + # FIXME: This check is necessary because of how Qwen2ForProcessRewardModel # subclasses DecoderModelForCausalLM. Perhaps the functionality # of DecoderModelForCausalLM reused by Qwen2ForProcessRewardModel diff --git a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py index 5c374d0f2a..6149201d58 100644 --- a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py +++ b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py @@ -1,6 +1,7 @@ from difflib import SequenceMatcher import pytest +import torch from utils.llm_data import llm_models_root from tensorrt_llm import LLM, SamplingParams @@ -43,19 +44,17 @@ def test_llama4(model_name, backend, tp_size, use_cuda_graph, "This is a very long prompt to exercise long context. Count up to 10000 from 1, 2, 3," + ", ".join(str(i) for i in range(4, 9000)) }, - # TODO: Fix multimodal test. - # { - # "prompt": "<|image|>This image is of color", - # "multi_modal_data": { - # "image": [torch.ones(3, 1024, 1024)] - # } - # }, + { + "prompt": "<|image|>This image is of color", + "multi_modal_data": { + "image": [torch.ones(3, 1024, 1024)] + } + }, ] expected_outputs = [ - " the head of state and head of government of the", - ", 9000, 9001, ", - # " white. What is the color of the background of" # TODO: Fix multimodal test. + " the head of state and head of government of the", ", 9000, 9001, ", + " white. What is the color of the background of" ] pytorch_config = dict(attn_backend=backend)