mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][fix] Fix llama4 multimodal by skipping request validation (#6957)
Signed-off-by: Chang Liu (Enterprise Products) <9713593+chang-l@users.noreply.github.com>
This commit is contained in:
parent
0893afae3d
commit
75b8a90816
@ -1181,6 +1181,17 @@ class PyExecutor:
|
||||
|
||||
def _validate_request(self, request: LlmRequest):
|
||||
if isinstance(self.model_engine.model, DecoderModelForCausalLM):
|
||||
# Only skip token‐range checks for Llama4 when the request has multimodal data
|
||||
from ..models.modeling_llama import Llama4ForConditionalGeneration
|
||||
if isinstance(self.model_engine.model,
|
||||
Llama4ForConditionalGeneration):
|
||||
has_mm = bool(request.py_multimodal_data)
|
||||
if has_mm:
|
||||
logger.debug(
|
||||
f"Skipping token-range validation for {type(self.model_engine.model).__name__} "
|
||||
"(multimodal request)")
|
||||
return
|
||||
|
||||
# FIXME: This check is necessary because of how Qwen2ForProcessRewardModel
|
||||
# subclasses DecoderModelForCausalLM. Perhaps the functionality
|
||||
# of DecoderModelForCausalLM reused by Qwen2ForProcessRewardModel
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from utils.llm_data import llm_models_root
|
||||
|
||||
from tensorrt_llm import LLM, SamplingParams
|
||||
@ -43,19 +44,17 @@ def test_llama4(model_name, backend, tp_size, use_cuda_graph,
|
||||
"This is a very long prompt to exercise long context. Count up to 10000 from 1, 2, 3,"
|
||||
+ ", ".join(str(i) for i in range(4, 9000))
|
||||
},
|
||||
# TODO: Fix multimodal test.
|
||||
# {
|
||||
# "prompt": "<|image|>This image is of color",
|
||||
# "multi_modal_data": {
|
||||
# "image": [torch.ones(3, 1024, 1024)]
|
||||
# }
|
||||
# },
|
||||
{
|
||||
"prompt": "<|image|>This image is of color",
|
||||
"multi_modal_data": {
|
||||
"image": [torch.ones(3, 1024, 1024)]
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
expected_outputs = [
|
||||
" the head of state and head of government of the",
|
||||
", 9000, 9001, ",
|
||||
# " white. What is the color of the background of" # TODO: Fix multimodal test.
|
||||
" the head of state and head of government of the", ", 9000, 9001, ",
|
||||
" white. What is the color of the background of"
|
||||
]
|
||||
|
||||
pytorch_config = dict(attn_backend=backend)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user