fix

Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-09-10 10:34:18 +08:00 · 2025-09-10 10:34:18 +08:00 · 2e61526d12
commit 2e61526d12
parent 5f508b7d43
3 changed files with 10 additions and 8 deletions
--- a/jenkins/Build.groovy
+++ b/jenkins/Build.groovy
@ -573,8 +573,8 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
        "Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
            pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
        // Disable CUDA12 build for too slow to build (cost > 5 hours on SBSA)
-        // "Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
-        //     pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
+        "Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
+            pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
        "Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
            pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
        "Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
--- a/tensorrt_llm/_torch/auto_deploy/models/patches/mistral3.py
+++ b/tensorrt_llm/_torch/auto_deploy/models/patches/mistral3.py
@ -91,7 +91,7 @@ def _mistral_forward(
        pixel_values: torch.Tensor,
        image_sizes: Optional[torch.Tensor],
    ):
-        return inputs_embeds
+        return inputs_embeds.clone()

    def _vision_branch(
        # ! The type annotations in the original transformers code are all wrong.
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_build_small_single.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_build_small_single.py
@ -65,11 +65,11 @@ def _check_ad_config(experiment_config: ExperimentConfig, llm_args: LlmArgs):
            compile_backend="torch-simple",
        ),
        # disabled due to https://nvbugspro.nvidia.com/bug/5505835
-        # get_small_model_config(
-        #     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-        #     attn_backend="flashinfer",
-        #     compile_backend="torch-simple",
-        # ),
+        get_small_model_config(
+            "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            attn_backend="flashinfer",
+            compile_backend="torch-simple",
+        ),
        get_small_model_config(
            "deepseek-ai/DeepSeek-V3",
            attn_backend="triton",
@ -97,6 +97,8 @@ def _check_ad_config(experiment_config: ExperimentConfig, llm_args: LlmArgs):
    ],
 )
 def test_build_ad(experiment_config: Dict):
+    if "Llama-4" in experiment_config["args"]["model"]:
+        pytest.skip("https://nvbugspro.nvidia.com/bug/5505835")
    experiment_config["args"]["runtime"] = "demollm"  # Default runtime set to demollm
    experiment_config["args"]["world_size"] = 0  # Default world_size set to 0
    experiment_config = ExperimentConfig(**experiment_config)