diff --git a/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py b/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py
index 239cdf35af..e09d35fac9 100644
--- a/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py
+++ b/tensorrt_llm/_torch/auto_deploy/models/patches/llama4.py
@@ -104,7 +104,7 @@ def _forward_with_cond(
         return inputs_embeds
 
     def _no_vision_branch(inputs_embeds, pixel_values, input_ids):
-        return inputs_embeds
+        return inputs_embeds.clone()
 
     # decide by whether there is any non-zero pixel_values
     has_image: torch.Tensor = torch.any(pixel_values != 0)
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py
index 51ff13fac3..105edae4bd 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py
@@ -1,4 +1,3 @@
-import pytest
 import torch
 from _model_test_utils import get_small_model_config
 from build_and_run_ad import ExperimentConfig
@@ -9,7 +8,7 @@ from tensorrt_llm._torch.auto_deploy.export import apply_export_patches, torch_e
 from tensorrt_llm._torch.auto_deploy.transformations._graph import move_to_device
 
 
-@pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5505835")
+# @pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5505835")
 def test_build_run_llama4_vlm():
     atol = 1e-3
     rtol = 1e-3