diff --git a/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py b/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py index 239cdf35af..e09d35fac9 100644 --- a/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py +++ b/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py @@ -104,7 +104,7 @@ def _forward_with_cond( return inputs_embeds def _no_vision_branch(inputs_embeds, pixel_values, input_ids): - return inputs_embeds + return inputs_embeds.clone() # decide by whether there is any non-zero pixel_values has_image: torch.Tensor = torch.any(pixel_values != 0) diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py index 51ff13fac3..105edae4bd 100644 --- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py +++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py @@ -1,4 +1,3 @@ -import pytest import torch from _model_test_utils import get_small_model_config from build_and_run_ad import ExperimentConfig @@ -9,7 +8,7 @@ from tensorrt_llm._torch.auto_deploy.export import apply_export_patches, torch_e from tensorrt_llm._torch.auto_deploy.transformations._graph import move_to_device -@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5505835") +# @pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5505835") def test_build_run_llama4_vlm(): atol = 1e-3 rtol = 1e-3