[None][fix] Fix llama4 multimodal by skipping request validation (#6957)

Signed-off-by: Chang Liu (Enterprise Products) <9713593+chang-l@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-08-20 18:58:53 -07:00 · 2025-08-20 18:58:53 -07:00 · 75b8a90816
commit 75b8a90816
parent 0893afae3d
2 changed files with 20 additions and 10 deletions
--- a/tensorrt_llm/_torch/pyexecutor/py_executor.py
+++ b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@ -1181,6 +1181,17 @@ class PyExecutor:

    def _validate_request(self, request: LlmRequest):
        if isinstance(self.model_engine.model, DecoderModelForCausalLM):
+            # Only skip token‐range checks for Llama4 when the request has multimodal data
+            from ..models.modeling_llama import Llama4ForConditionalGeneration
+            if isinstance(self.model_engine.model,
+                          Llama4ForConditionalGeneration):
+                has_mm = bool(request.py_multimodal_data)
+                if has_mm:
+                    logger.debug(
+                        f"Skipping token-range validation for {type(self.model_engine.model).__name__} "
+                        "(multimodal request)")
+                    return
+
            # FIXME: This check is necessary because of how Qwen2ForProcessRewardModel
            #        subclasses DecoderModelForCausalLM. Perhaps the functionality
            #        of DecoderModelForCausalLM reused by Qwen2ForProcessRewardModel
--- a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
+++ b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
@ -1,6 +1,7 @@
 from difflib import SequenceMatcher

 import pytest
+import torch
 from utils.llm_data import llm_models_root

 from tensorrt_llm import LLM, SamplingParams
@ -43,19 +44,17 @@ def test_llama4(model_name, backend, tp_size, use_cuda_graph,
            "This is a very long prompt to exercise long context. Count up to 10000 from 1, 2, 3,"
            + ", ".join(str(i) for i in range(4, 9000))
        },
-        # TODO: Fix multimodal test.
-        # {
-        #     "prompt": "<|image|>This image is of color",
-        #     "multi_modal_data": {
-        #         "image": [torch.ones(3, 1024, 1024)]
-        #     }
-        # },
+        {
+            "prompt": "<|image|>This image is of color",
+            "multi_modal_data": {
+                "image": [torch.ones(3, 1024, 1024)]
+            }
+        },
    ]

    expected_outputs = [
-        " the head of state and head of government of the",
-        ", 9000, 9001, ",
-        # " white. What is the color of the background of"  # TODO: Fix multimodal test.
+        " the head of state and head of government of the", ", 9000, 9001, ",
+        " white. What is the color of the background of"
    ]

    pytorch_config = dict(attn_backend=backend)