From d736c7f2908e69c47988108d7dff49770bf29719 Mon Sep 17 00:00:00 2001
From: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
Date: Wed, 7 Jan 2026 20:16:53 -0500
Subject: [PATCH 01/16] [https://nvbugs/5761665][fix] AutoDeploy: handle bugs
 for 25.12 dlfw upgrade (#10511)

Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
---
 .../auto_deploy/_utils_test/_model_test_utils.py |  4 ++--
 .../unit/singlegpu/models/test_hybrid_patches.py |  9 ++++-----
 .../singlegpu/models/test_modeling_nemotron_h.py |  9 ++++-----
 .../library/test_attention_matcher.py            | 16 ++++++++--------
 .../library/test_attention_matcher_hf.py         |  2 +-
 .../transformations/library/test_fuse_rmsnorm.py |  2 +-
 .../library/test_fused_add_rms_norm.py           |  5 +++--
 .../library/test_gather_logits_before_lm_head.py |  2 +-
 .../library/test_rope_transformation.py          |  4 ++--
 .../singlegpu/transformations/test_export.py     | 10 +++++-----
 10 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py b/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py
index af821955d4..5adb8e5a73 100644
--- a/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py
+++ b/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py
@@ -278,8 +278,8 @@ class FakeFP8Linear(nn.Linear):
 def generate_dynamic_shapes(max_batch_size, max_seq_len):
     dynamic_shapes = (
         {
-            0: Dim("batch_size", max=max_batch_size),
-            1: Dim("seq_len", max=max_seq_len),
+            0: Dim.DYNAMIC,
+            1: Dim.DYNAMIC,
         },
     )
     return dynamic_shapes
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py
index 6ea5c0efa1..430add5a28 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_hybrid_patches.py
@@ -72,12 +72,11 @@ def test_bamba_patches(
     position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).repeat(
         input_ids.shape[0], 1
     )
+    batch_size_dynamic = Dim.DYNAMIC
+    seq_len_dynamic = Dim.DYNAMIC
     dynamic_shapes = (
-        {0: Dim("batch_size", min=0, max=8), 1: Dim("seq_len", min=0, max=512)},
-        {
-            0: Dim("batch_size", min=0, max=8),
-            1: Dim("seq_len", min=0, max=512),
-        },
+        {0: batch_size_dynamic, 1: seq_len_dynamic},
+        {0: batch_size_dynamic, 1: seq_len_dynamic},
     )
 
     def _run_torch_export_to_gm():
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py
index 94b22ed14f..d5d624e721 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/models/test_modeling_nemotron_h.py
@@ -184,12 +184,11 @@ def test_custom_model_implementation_can_be_exported(
     position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).repeat(
         input_ids.shape[0], 1
     )
+    batch_size_dynamic = Dim.DYNAMIC
+    seq_len_dynamic = Dim.DYNAMIC
     dynamic_shapes = (
-        {0: Dim("batch_size", min=0, max=8), 1: Dim("seq_len", min=0, max=512)},
-        {
-            0: Dim("batch_size", min=0, max=8),
-            1: Dim("seq_len", min=0, max=512),
-        },
+        {0: batch_size_dynamic, 1: seq_len_dynamic},
+        {0: batch_size_dynamic, 1: seq_len_dynamic},
     )
 
     def _run_torch_export_to_gm():
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py
index c3b1830317..40a331025a 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher.py
@@ -84,7 +84,7 @@ class RepeatKVModel(torch.nn.Module):
         return output
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 class RepeatKVModel2(RepeatKVModel):
@@ -185,7 +185,7 @@ class EagerAttentionModel(torch.nn.Module):
         return output
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 class ComplexEagerAttentionModel(torch.nn.Module):
@@ -274,7 +274,7 @@ class ComplexEagerAttentionModel(torch.nn.Module):
         return output
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 class CounterExampleModel(torch.nn.Module):
@@ -329,7 +329,7 @@ class CounterExampleModel(torch.nn.Module):
         return features_case1
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 class GroupedAttentionModel(torch.nn.Module):
@@ -403,7 +403,7 @@ class GroupedAttentionModel(torch.nn.Module):
         return output
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 def _get_match_repeat_kv_optimizer() -> Callable:
@@ -907,7 +907,7 @@ class CausalAttentionModel(torch.nn.Module):
         return output
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 class Llama3CausalAttentionModel(torch.nn.Module):
@@ -1013,7 +1013,7 @@ class Llama3CausalAttentionModel(torch.nn.Module):
         return output
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 class AttentionLayoutModel(torch.nn.Module):
@@ -1102,7 +1102,7 @@ class AttentionLayoutModel(torch.nn.Module):
         return output
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=4, max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 class BsndAttentionModel(AttentionLayoutModel):
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py
index 3ae7775c6a..661f1863ee 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_attention_matcher_hf.py
@@ -117,7 +117,7 @@ def test_match_llama_attention(config: Dict[str, Any], attn_implementation: str)
         "attn_implementation": attn_implementation,
         **config,
     }
-    dynamic_shapes = {0: Dim("batch_size", max=8), 1: Dim("seq_len", min=2, max=8)}
+    dynamic_shapes = {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
     # Build and export model on meta device
     with init_empty_weights():
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py
index fe88a866f5..d354f9d50f 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fuse_rmsnorm.py
@@ -59,7 +59,7 @@ def _run_test(model, op, variant):
         return any(is_op(n, op) for n in gm.graph.nodes)
 
     x = torch.randn(2, 1024, device="cuda", dtype=torch.float16)
-    dynamic_shapes = {0: Dim("batch_size", max=8)}
+    dynamic_shapes = {0: Dim.DYNAMIC}
     gm = torch_export_to_gm(model, args=(x,), dynamic_shapes=(dynamic_shapes,), clone=True)
     gm_transformed = InferenceOptimizer(
         None,
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py
index 8cfb59756a..6926e980c9 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_fused_add_rms_norm.py
@@ -37,8 +37,9 @@ def _run_test(model):
     residual = torch.randn(bsz, seq_len, hidden, device="cuda", dtype=torch.bfloat16)
 
     # Dynamic shapes
-    ds_x = {0: Dim("batch_size", max=8)}
-    ds_res = {0: Dim("batch_size", max=8)}
+    dyn_batch_size = Dim.DYNAMIC
+    ds_x = {0: dyn_batch_size}
+    ds_res = {0: dyn_batch_size}
 
     gm = torch_export_to_gm(model, args=(x, residual), dynamic_shapes=(ds_x, ds_res), clone=True)
 
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py
index 42cd57752c..abf5d6e1d5 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_gather_logits_before_lm_head.py
@@ -189,7 +189,7 @@ class TestGatherLogitsBeforeLmHeadTransform:
         else:
             # dynamic_shapes should be a tuple matching the number of positional args
             dynamic_shapes = (
-                {0: Dim("batch_size", min=1, max=max_batch_size)},  # hidden_states
+                {0: Dim.DYNAMIC},  # hidden_states
                 None,  # logit_gather_ids (static)
                 None,  # seq_len (static)
             )
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py
index ffa2c0ccd8..291cd377bd 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/library/test_rope_transformation.py
@@ -135,7 +135,7 @@ class RoPEModel(torch.nn.Module):
         return out.to(torch.float16) if self.mode == "match" else out
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 @pytest.mark.parametrize(
@@ -387,7 +387,7 @@ class DSModel(torch.nn.Module):
         return torch.cat([q_out, k_out], dim=-1)
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=8), 1: Dim("seq_len", max=16)}
+        return {0: Dim.DYNAMIC, 1: Dim.DYNAMIC}
 
 
 @pytest.mark.parametrize(
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py
index 3c28697f3b..7a5b5e2446 100644
--- a/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py
+++ b/tests/unittest/_torch/auto_deploy/unit/singlegpu/transformations/test_export.py
@@ -47,7 +47,7 @@ class MLPForExport(ModuleForExport):
         return torch.randn(2, 10)
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=100)}
+        return {0: Dim.DYNAMIC}
 
 
 class MLPDuplicate(ModuleForExport):
@@ -72,7 +72,7 @@ class MLPDuplicate(ModuleForExport):
         return {"fc3.weight"}
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=100)}
+        return {0: Dim.DYNAMIC}
 
 
 class ModuleWithWhere(ModuleForExport):
@@ -90,7 +90,7 @@ class ModuleWithWhere(ModuleForExport):
         return torch.randn(2, 10)
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=100)}
+        return {0: Dim.DYNAMIC}
 
     def check_xfail(self, f_export, use_dynamic_shape, device) -> bool:
         return (
@@ -129,7 +129,7 @@ class ModuleWithRouting(ModuleForExport):
         return torch.randn(self.seq_len, self.num_experts)
 
     def get_dynamic_shapes(self):
-        return {0: Dim("seq_len", max=100)}
+        return {0: Dim.DYNAMIC}
 
     def check_xfail(self, f_export, use_dynamic_shape, device) -> bool:
         return (
@@ -153,7 +153,7 @@ class ModuleWithModuleList(ModuleForExport):
         return torch.randn(2, 10, device=self.fcs[0].weight.device)
 
     def get_dynamic_shapes(self):
-        return {0: Dim("batch_size", max=100)}
+        return {0: Dim.DYNAMIC}
 
     def check_xfail(self, f_export, use_dynamic_shape, device) -> bool:
         # non-strict mode only works with our hack in torch_export_to_gm

From 81f878c2793693a788550e63641ba0b31abf6f97 Mon Sep 17 00:00:00 2001
From: xxi <95731198+xxi-nv@users.noreply.github.com>
Date: Thu, 8 Jan 2026 09:17:59 +0800
Subject: [PATCH 02/16] [https://nvbugs/5707392][fix] unwaive
 test_fused_moe_fp8_blockwise_wide_ep[NotEnabled] (#10428)

Signed-off-by: xxi <xxi@nvidia.com>
---
 tests/integration/test_lists/waives.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index a71d0475c0..f211cdb1fd 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -370,7 +370,6 @@ accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype
 accuracy/test_llm_api_pytorch_multimodal.py::TestLlava_V1_6_Mistral_7B::test_auto_dtype SKIP (https://nvbugs/5707087)
 accuracy/test_llm_api_pytorch_multimodal.py::TestPhi4MMFusedVisionLora::test_auto_dtype SKIP (https://nvbugs/5707087)
 disaggregated/test_disaggregated.py::test_disaggregated_ctxtp2pp2_gentp2pp2[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5705199)
-unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_wide_ep[NotEnabled] SKIP (https://nvbugs/5707392)
 accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_auto_dtype_tp2 SKIP (https://nvbugs/5707145)
 accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_fp8_prequantized_tp2 SKIP (https://nvbugs/5707145)
 accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cuda_graph=True] SKIP (https://nvbugs/5640697)

From 09d9878385a7ac52c11491e6981d9f98a326449d Mon Sep 17 00:00:00 2001
From: Yukun He <23156053+hyukn@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:21:02 +0800
Subject: [PATCH 03/16] [TRTLLM-9661][chore] Further reduce tuning time for
 cuteDSL nvFP4 dense gemm. (#10339)

Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com>
---
 .../_torch/custom_ops/cute_dsl_custom_ops.py  | 35 +++++++++++++------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py b/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py
index ae61e2b64c..771e7ed7c8 100644
--- a/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py
+++ b/tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py
@@ -6,7 +6,7 @@ import torch
 from tensorrt_llm.logger import logger
 
 from ..._utils import get_sm_version
-from ...math_utils import pad_up
+from ...math_utils import ceil_div, pad_up
 from ..autotuner import (AutoTuner, ConstraintSpec, DistributedTuningStrategy,
                          DynamicTensorSpec, OptimizationProfile, TunableRunner,
                          TuningConfig)
@@ -314,6 +314,16 @@ class GatherGroupedGemmInputsHelper(GroupedGemmInputsHelper):
                 num_non_exiting_tiles, global_sf)
 
 
+def get_dense_gemm_approximate_cta_nums(
+        M: int, N: int, tile_mn: Tuple[int, int],
+        cluster_shape_mn: Tuple[int, int]) -> int:
+    tile_m, tile_n = tile_mn
+    cluster_m, cluster_n = cluster_shape_mn
+    clustered_ctas_m = pad_up(ceil_div(M, tile_m), cluster_m)
+    clustered_ctas_n = pad_up(ceil_div(N, tile_n), cluster_n)
+    return clustered_ctas_m * clustered_ctas_n
+
+
 if IS_CUTLASS_DSL_AVAILABLE:
 
     import cutlass
@@ -360,15 +370,6 @@ if IS_CUTLASS_DSL_AVAILABLE:
         def unique_id(self):
             return (self.output_dtype, self.to_userbuffers, self.use_tvm_ffi)
 
-        def __hash__(self):
-            return hash(
-                (self.output_dtype, self.to_userbuffers, self.use_tvm_ffi))
-
-        def __eq__(self, other):
-            if not isinstance(other, self.__class__):
-                return False
-            return self.output_dtype == other.output_dtype and self.to_userbuffers == other.to_userbuffers and self.use_tvm_ffi == other.use_tvm_ffi
-
         def get_valid_tactics(
             self,
             inputs: List[torch.Tensor],
@@ -454,6 +455,7 @@ if IS_CUTLASS_DSL_AVAILABLE:
                 (4, 4),
             ]
             swap_ab_candidates = [True, False]
+            # prune: prefetch is beneficial only when K is large enough
             use_prefetch_candidates = [True, False]
 
             valid_tactics = []
@@ -484,6 +486,19 @@ if IS_CUTLASS_DSL_AVAILABLE:
                         b_major,
                         c_major,
                 ):
+                    # Prefetch pruning to save tuning time
+                    cta_nums = get_dense_gemm_approximate_cta_nums(
+                        m, n, mma_tiler_mn, cluster_shape_mn)
+                    cta_wave_ratio = cta_nums / torch.cuda.get_device_properties(
+                    ).multi_processor_count
+                    if use_prefetch and not any((
+                            # CTA waves ratio between 0.5 and 1.0
+                            0.5 < cta_wave_ratio < 1.0,
+                            # K is large enough
+                            real_k >= 8192,
+                    )):
+                        continue
+
                     valid_tactics.append(
                         (mma_tiler_mn, cluster_shape_mn, swap_ab, use_prefetch))
 

From b85c447ceb1ff91c5d4df6b71de2256a5fabfe9d Mon Sep 17 00:00:00 2001
From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:32:50 +0800
Subject: [PATCH 04/16] [https://nvbugs/5784543][fix] Setup dist before using
 autotuner. (#10491)

Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
---
 tests/unittest/_torch/modules/test_fused_moe.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/unittest/_torch/modules/test_fused_moe.py b/tests/unittest/_torch/modules/test_fused_moe.py
index 5a0b641f1b..29be45b8fd 100644
--- a/tests/unittest/_torch/modules/test_fused_moe.py
+++ b/tests/unittest/_torch/modules/test_fused_moe.py
@@ -24,6 +24,7 @@ from utils.util import (check_accuracy, skip_blackwell, skip_blackwell_geforce,
                         skip_pre_hopper)
 
 from tensorrt_llm._torch.autotuner import AutoTuner, autotune
+from tensorrt_llm._torch.distributed import MPIDist, TorchDist
 from tensorrt_llm._torch.model_config import ModelConfig
 from tensorrt_llm._torch.modules.fused_moe.fused_moe_cute_dsl import \
     CuteDslFusedMoE
@@ -44,7 +45,7 @@ from tensorrt_llm._torch.modules.fused_moe.quantization import \
 from tensorrt_llm._torch.modules.fused_moe.fused_moe_triton import \
     IS_TRITON_KERNELS_AVAILABLE
 from tensorrt_llm._torch.modules.gated_mlp import GatedMLP
-from tensorrt_llm._utils import get_sm_version, mpi_rank
+from tensorrt_llm._utils import get_sm_version, mpi_disabled, mpi_rank
 from tensorrt_llm.mapping import Mapping
 from tensorrt_llm.models.modeling_utils import QuantAlgo, QuantConfig
 
@@ -104,6 +105,12 @@ def test_fused_moe(moe_backend,
 
     mapping = mapping or Mapping()
     mapping.rank = mpi_rank()
+    if mpi_disabled():
+        dist = TorchDist(mapping=mapping)
+    else:
+        dist = MPIDist(mapping=mapping)
+
+    AutoTuner.get().setup_distributed_state(mapping, dist)
 
     torch.cuda.set_device(mapping.rank)
 

From f8b2a8fd30ca22cc464d7192e7bf2cd481d3800e Mon Sep 17 00:00:00 2001
From: yingguo-trt <244492186+yingguo-trt@users.noreply.github.com>
Date: Thu, 8 Jan 2026 10:51:36 +0800
Subject: [PATCH 05/16] [None][chore] Support multiple job submission at the
 same time (#10492)

Signed-off-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
Co-authored-by: FredricZ-2007 <226039983+fredricz-20070104@users.noreply.github.com>
---
 tests/integration/defs/perf/disagg/README.md  | 145 ++++++++++++-
 .../integration/defs/perf/disagg/conftest.py  | 193 +++++++++++++++++-
 .../defs/perf/disagg/execution/executor.py    |   4 +-
 .../perf/disagg/execution/subprocess_utils.py |  19 +-
 .../defs/perf/disagg/test_disagg.py           |  39 ++--
 5 files changed, 365 insertions(+), 35 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/README.md b/tests/integration/defs/perf/disagg/README.md
index 28ba839c6e..5921900b70 100644
--- a/tests/integration/defs/perf/disagg/README.md
+++ b/tests/integration/defs/perf/disagg/README.md
@@ -132,6 +132,141 @@ poetry run pytest --disagg test_disagg.py -s -vv -m accuracy
 poetry run pytest --disagg test_disagg.py -s -vv -k "deepseek-r1-fp4_1k1k"
 ```
 
+## Batch Job Submission
+
+The framework supports automatic batch job submission to maximize parallelism in SLURM cluster environments. Instead of submitting jobs one-by-one, it groups test cases into batches and submits entire batches when needed.
+
+### Quick Start
+
+**Default batch size (5 jobs per batch):**
+```bash
+# Run all tests with default batching
+poetry run pytest --disagg test_disagg.py -s -vv
+
+# Run with test list
+poetry run pytest --disagg test_disagg.py -s -vv --disagg-test-list=./testlist/all.txt
+```
+
+**Custom batch size:**
+```bash
+# Set batch size via command line
+poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=10
+
+# Set batch size via environment variable
+export DISAGG_BATCH_SIZE=20
+poetry run pytest --disagg test_disagg.py -s -vv
+
+# Submit all jobs at once (unlimited batch)
+poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=0
+```
+
+### How Batch Submission Works
+
+```
+Pytest Collection Phase:
+  - Collects all test cases (e.g., 100 tests)
+  - BatchManager splits them into batches (e.g., 20 batches of 5)
+
+Pytest Execution Phase:
+  Test 0 runs:
+    -> Triggers submission of Batch 0 (jobs 0-4)
+    -> Waits for job 0 to complete
+  
+  Test 1-4 run:
+    -> Batch 0 already submitted, directly wait for completion
+  
+  Test 5 runs:
+    -> Triggers submission of Batch 1 (jobs 5-9)
+    -> Waits for job 5 to complete
+  
+  ... and so on
+```
+
+### Key Benefits
+
+- **Parallel Execution**: All jobs in a batch run simultaneously on SLURM cluster
+- **Reduced Wait Time**: Total time ≈ MAX(job time) instead of SUM(job times)
+- **Automatic Management**: No need to manually split test lists
+- **Lazy Loading**: Only submits batches when needed
+
+### Configuration Options
+
+**Priority**: Command line option > Environment variable > Default (5)
+
+**Examples:**
+
+```bash
+# Small batch for quick testing
+poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=3 \
+  --disagg-test-list=./testlist/debug.txt
+
+# Large batch for production
+poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=50 \
+  --disagg-test-list=./testlist/all.txt
+
+# Submit all at once
+poetry run pytest --disagg test_disagg.py -s -vv --disagg-batch-size=0
+```
+
+### Timeout Configuration
+
+The default timeout for waiting for job completion is **10 hours (36000 seconds)**, which accounts for:
+- SLURM queue wait time
+- Job execution time
+- Buffer for delays
+
+### Performance Comparison
+
+**Before (Sequential Submission):**
+```
+Case 1: submit + wait (1.5h) = 1.5h
+Case 2: submit + wait (1.5h) = 1.5h
+Case 3: submit + wait (1.5h) = 1.5h
+...
+Total: 50 × 1.5h = 75 hours
+```
+
+**After (Batch Submission, batch_size=50):**
+```
+Batch 0 (50 jobs): submitted in parallel
+  Case 1: wait (1.5h)
+  Case 2-50: wait (0s, already done)
+
+Total: ~1.5 hours
+```
+
+**Speedup: 50x**
+
+### Troubleshooting
+
+**Check BatchManager initialization:**
+```
+======================================================================
+Batch Manager Initialized
+Batch size: 5 jobs per batch
+======================================================================
+
+Total test configs: 20
+Total batches: 4
+```
+
+**Monitor batch submission:**
+```
+======================================================================
+Submitting Batch 0
+Range: [0:5] (5 jobs)
+======================================================================
+
+  [  1/5] Job 1234 <- test_config_id_1
+  [  2/5] Job 1235 <- test_config_id_2
+  ...
+```
+
+**If jobs timeout frequently:**
+- Check SLURM queue status
+- Consider reducing batch size to avoid resource contention
+- Verify that timeout (36000s) is sufficient for your workload
+
 ## Test Naming Convention
 
 Tests are automatically named using the format:
@@ -193,6 +328,7 @@ Test results are saved to:
 - `GPU_TYPE`: Current GPU type (default: GB200)
 - `OUTPUT_PATH`: Directory for test results and logs
 - `WORK_DIR`: Working directory for benchmark execution
+- `DISAGG_BATCH_SIZE`: Default batch size for job submission (default: 5)
 - `DEBUG_MODE`: Enable debug mode (set to "1" to skip job submission)
 - `DEBUG_JOB_ID`: Job ID to use in debug mode
 
@@ -212,10 +348,11 @@ The framework consists of:
 
 1. **ConfigLoader**: Scans and loads YAML configurations
 2. **ConfigValidator**: Validates configuration correctness
-3. **JobManager**: Handles SLURM job submission and monitoring
-4. **LogParser**: Extracts metrics from benchmark logs
-5. **TestCaseTracker**: Tracks test execution timing
-6. **ResultSaver**: Saves results to CSV
+3. **BatchManager**: Manages batch job submission for parallel execution
+4. **JobManager**: Handles SLURM job submission and monitoring
+5. **LogParser**: Extracts metrics from benchmark logs
+6. **TestCaseTracker**: Tracks test execution timing
+7. **ResultSaver**: Saves results to CSV
 
 ## Benefits
 
diff --git a/tests/integration/defs/perf/disagg/conftest.py b/tests/integration/defs/perf/disagg/conftest.py
index 2dabeda1cd..a4b88542df 100644
--- a/tests/integration/defs/perf/disagg/conftest.py
+++ b/tests/integration/defs/perf/disagg/conftest.py
@@ -1,9 +1,11 @@
 """Pytest configuration for disagg tests.
 
 Only collects tests in this directory when --disagg parameter is provided.
-Can share options like --disagg-test-list defined in this conftest.py.
+Provides batch job submission capability to improve parallelism.
 """
 
+import os
+
 import pytest
 from utils.logger import logger
 
@@ -23,6 +25,15 @@ def pytest_addoption(parser):
         help="Path to a file containing test IDs (one per line) to run. "
         "Example: pytest --disagg --disagg-test-list=testlist/testlist_gb200.txt",
     )
+    parser.addoption(
+        "--disagg-batch-size",
+        action="store",
+        type=int,
+        default=None,
+        help="Number of jobs to submit per batch. Default: from env DISAGG_BATCH_SIZE or 5. "
+        "Set to 0 for unlimited (submit all at once). "
+        "Example: pytest --disagg --disagg-batch-size=10",
+    )
 
 
 def pytest_collect_directory(path, parent):
@@ -45,7 +56,6 @@ def pytest_collect_directory(path, parent):
         return True
 
     # With --disagg parameter, proceed with normal collection
-    # Can subsequently use --disagg-test-list and other options from main conftest.py for filtering
     return None
 
 
@@ -88,7 +98,7 @@ def pytest_collection_modifyitems(config, items):
 
     for item in items:
         # item.nodeid is the full test identifier like:
-        # "test_disagg_simple.py::TestDisaggBenchmark::test_benchmark[deepseek-r1-fp4:1k1k:...]"
+        # "test_disagg.py::TestDisaggBenchmark::test_benchmark[deepseek-r1-fp4:1k1k:...]"
         if item.nodeid in wanted_tests:
             selected.append(item)
         else:
@@ -112,3 +122,180 @@ def pytest_collection_modifyitems(config, items):
         logger.warning(f"Please check that the test IDs in {test_list_file} are correct.")
 
     logger.info(f"{'=' * 70}\n")
+
+
+class BatchManager:
+    """Batch job submission manager for disagg tests.
+
+    Automatically splits test cases into batches and submits them on-demand
+    to maximize parallelism in SLURM cluster environments.
+
+    Key features:
+    - Lazy batch submission: only submits when needed
+    - Configurable batch size via CLI or environment variable
+    - Maintains job_id mapping for all submitted jobs
+    """
+
+    def __init__(self, batch_size=5):
+        """Initialize batch manager.
+
+        Args:
+            batch_size: Number of jobs per batch. None or 0 means unlimited (submit all at once).
+                       Default is 5 if not specified.
+        """
+        # Normalize batch_size: None, 0, or negative means unlimited
+        if batch_size is None or batch_size <= 0:
+            self.batch_size = None
+        else:
+            self.batch_size = batch_size
+
+        self.submitted_batches = set()  # Track which batch numbers have been submitted
+        self.job_mapping = {}  # Map test_id -> SLURM job_id
+        self.all_configs = []  # Ordered list of all test configs
+
+        logger.info(f"\n{'=' * 70}")
+        logger.info("Batch Manager Initialized")
+        if self.batch_size:
+            logger.info(f"Batch size: {self.batch_size} jobs per batch")
+        else:
+            logger.info("Batch size: unlimited (submit all at once)")
+        logger.info(f"{'=' * 70}\n")
+
+    def add_config(self, test_config):
+        """Add a test configuration to the manager.
+
+        Called during initialization to build the ordered list of configs.
+
+        Args:
+            test_config: TestConfig object to add
+        """
+        self.all_configs.append(test_config)
+
+    def get_job_id(self, test_config):
+        """Get SLURM job ID for a test config, submitting batch if needed.
+
+        This is the main entry point. It:
+        1. Determines which batch the test belongs to
+        2. Submits the entire batch if not already submitted
+        3. Returns the job_id for this specific test
+
+        Args:
+            test_config: TestConfig object to get job_id for
+
+        Returns:
+            str: SLURM job ID, or None if submission failed
+        """
+        # Find the index of this config in the ordered list
+        try:
+            idx = next(
+                i for i, c in enumerate(self.all_configs) if c.test_id == test_config.test_id
+            )
+        except StopIteration:
+            logger.error(f"Config not found in manager: {test_config.test_id}")
+            return None
+
+        # Calculate which batch this test belongs to
+        if self.batch_size:
+            batch_num = idx // self.batch_size
+        else:
+            batch_num = 0  # All tests in one batch
+
+        # Submit the batch if not already submitted
+        if batch_num not in self.submitted_batches:
+            self._submit_batch(batch_num)
+
+        # Return the cached job_id
+        return self.job_mapping.get(test_config.test_id)
+
+    def _submit_batch(self, batch_num):
+        """Submit all jobs in a specific batch.
+
+        Args:
+            batch_num: Batch number to submit (0-indexed)
+        """
+        from execution.executor import JobManager
+
+        # Calculate batch range
+        if self.batch_size:
+            start_idx = batch_num * self.batch_size
+            end_idx = min(start_idx + self.batch_size, len(self.all_configs))
+        else:
+            start_idx = 0
+            end_idx = len(self.all_configs)
+
+        batch_configs = self.all_configs[start_idx:end_idx]
+
+        logger.info(f"\n{'=' * 70}")
+        logger.info(f"Submitting Batch {batch_num}")
+        logger.info(f"Range: [{start_idx}:{end_idx}] ({len(batch_configs)} jobs)")
+        logger.info(f"{'=' * 70}\n")
+
+        # Submit all jobs in this batch
+        success_count = 0
+        for i, config in enumerate(batch_configs, 1):
+            try:
+                success, job_id = JobManager.submit_test_job(config)
+                if success and job_id:
+                    self.job_mapping[config.test_id] = job_id
+                    success_count += 1
+                    # Truncate test_id for display
+                    display_id = (
+                        config.test_id[:60] + "..." if len(config.test_id) > 60 else config.test_id
+                    )
+                    logger.success(f"  [{i:3d}/{len(batch_configs)}] Job {job_id} <- {display_id}")
+                else:
+                    self.job_mapping[config.test_id] = None
+                    logger.error(f"  [{i:3d}/{len(batch_configs)}] Failed: {config.test_id[:50]}")
+            except Exception as e:
+                self.job_mapping[config.test_id] = None
+                logger.error(f"  [{i:3d}/{len(batch_configs)}] Error: {e}")
+
+        # Mark batch as submitted
+        self.submitted_batches.add(batch_num)
+
+        logger.info(f"\n{'=' * 70}")
+        logger.success(
+            f"Batch {batch_num} Complete: {success_count}/{len(batch_configs)} succeeded"
+        )
+        logger.info(f"{'=' * 70}\n")
+
+
+@pytest.fixture(scope="session")
+def batch_manager(request):
+    """Provide batch manager fixture for test methods.
+
+    This session-scoped fixture creates and initializes the BatchManager
+    with all collected test configs.
+
+    Returns:
+        BatchManager: Initialized batch manager instance
+    """
+    # Get batch size from CLI option or environment variable
+    batch_size = request.config.getoption("--disagg-batch-size")
+    if batch_size is None:
+        env_batch_size = os.getenv("DISAGG_BATCH_SIZE")
+        if env_batch_size:
+            try:
+                batch_size = int(env_batch_size)
+            except ValueError:
+                logger.warning(f"Invalid DISAGG_BATCH_SIZE: {env_batch_size}, using default 5")
+                batch_size = 5
+        else:
+            batch_size = 5  # Default batch size
+
+    # Create batch manager
+    manager = BatchManager(batch_size=batch_size)
+
+    # Extract all test configs from collected items
+    for item in request.session.items:
+        if hasattr(item, "callspec") and "test_config" in item.callspec.params:
+            manager.add_config(item.callspec.params["test_config"])
+
+    # Log statistics
+    logger.info(f"Total test configs: {len(manager.all_configs)}")
+    if manager.batch_size:
+        total_batches = (len(manager.all_configs) + manager.batch_size - 1) // manager.batch_size
+        logger.info(f"Total batches: {total_batches}")
+    logger.info("")
+
+    return manager
diff --git a/tests/integration/defs/perf/disagg/execution/executor.py b/tests/integration/defs/perf/disagg/execution/executor.py
index d454765c53..547b63aa8c 100644
--- a/tests/integration/defs/perf/disagg/execution/executor.py
+++ b/tests/integration/defs/perf/disagg/execution/executor.py
@@ -114,7 +114,9 @@ class JobManager:
             logger.debug(f"Script: {script_path}")
             logger.debug(f"Log file: {output_log_file}")
 
-            output = exec_cmd_with_output(sbatch_args, timeout=60)
+            # Use check=False to allow submission even with Kerberos warnings
+            # (mimics submit.py behavior)
+            output = exec_cmd_with_output(sbatch_args, timeout=60, check=False)
             job_id = output.strip()
 
             # Parse job ID (--parsable returns just the job ID)
diff --git a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py
index 9ab7771426..39a3f0ac4b 100644
--- a/tests/integration/defs/perf/disagg/execution/subprocess_utils.py
+++ b/tests/integration/defs/perf/disagg/execution/subprocess_utils.py
@@ -33,19 +33,22 @@ def exec_cmd(*popenargs, timeout: Optional[float] = None, **kwargs) -> int:
     return result.returncode
 
 
-def exec_cmd_with_output(*popenargs, timeout: Optional[float] = None, **kwargs) -> str:
+def exec_cmd_with_output(
+    *popenargs, timeout: Optional[float] = None, check: bool = True, **kwargs
+) -> str:
     """Execute command and return output as string.
 
     Args:
         *popenargs: Command and arguments
         timeout: Timeout in seconds
+        check: If True, raise CalledProcessError on non-zero exit code (default: True)
         **kwargs: Additional subprocess arguments
 
     Returns:
         stdout as string (decoded from bytes)
 
     Raises:
-        subprocess.CalledProcessError: If command returns non-zero exit code
+        subprocess.CalledProcessError: If check=True and command returns non-zero exit code
         subprocess.TimeoutExpired: If timeout is reached
     """
     result = subprocess.run(
@@ -53,11 +56,15 @@ def exec_cmd_with_output(*popenargs, timeout: Optional[float] = None, **kwargs)
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         timeout=timeout,
-        check=True,
+        check=check,
         **kwargs,
     )
-    # Log stderr if it exists
+    # Log stderr if it exists (as warning if check=False, as error if check=True)
     if result.stderr:
-        stderr_output = result.stderr.decode()
-        logger.error(f"Command stderr: {stderr_output}")
+        stderr_output = result.stderr.decode().strip()
+        if stderr_output:
+            if check:
+                logger.error(f"Command stderr: {stderr_output}")
+            else:
+                logger.warning(f"Command stderr: {stderr_output}")
     return result.stdout.decode()
diff --git a/tests/integration/defs/perf/disagg/test_disagg.py b/tests/integration/defs/perf/disagg/test_disagg.py
index 39008ca11a..b60ba85196 100644
--- a/tests/integration/defs/perf/disagg/test_disagg.py
+++ b/tests/integration/defs/perf/disagg/test_disagg.py
@@ -62,7 +62,7 @@ class TestDisaggBenchmark:
 
     @pytest.mark.perf
     @pytest.mark.parametrize("test_config", PERF_TEST_CASES)
-    def test_benchmark(self, request, test_config: TestConfig):
+    def test_benchmark(self, request, batch_manager, test_config: TestConfig):
         """Performance benchmark test for YAML configurations."""
         full_test_name = request.node.name
 
@@ -101,15 +101,14 @@ class TestDisaggBenchmark:
                 )
                 job_id = EnvManager.get_debug_job_id()
             else:
-                # Submit job using JobManager
-                success, job_id = JobManager.submit_test_job(test_config)
+                # Get job_id from batch manager (auto-submits batch if needed)
+                job_id = batch_manager.get_job_id(test_config)
 
                 # Validate submission result
-                assert success, f"Job submission failed: {test_config.test_id}"
-                assert job_id, "Unable to get job ID"
+                assert job_id, f"Failed to get job_id for {test_config.test_id}"
 
-                # Wait for completion (timeout/early failure handled inside)
-                JobManager.wait_for_completion(job_id, 7200, test_config, check_early_failure=True)
+                # Wait for completion (timeout: 10 hours = 36000 seconds)
+                JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True)
 
             # End tracking test case
             test_tracker.end_test_case()
@@ -136,7 +135,7 @@ class TestDisaggBenchmark:
 
     @pytest.mark.accuracy
     @pytest.mark.parametrize("test_config", ACCURACY_TEST_CASES)
-    def test_accuracy(self, request, test_config: TestConfig):
+    def test_accuracy(self, request, batch_manager, test_config: TestConfig):
         """Accuracy test for YAML configurations."""
         full_test_name = request.node.name
 
@@ -179,15 +178,14 @@ class TestDisaggBenchmark:
                 )
                 job_id = EnvManager.get_debug_job_id()
             else:
-                # Submit job using JobManager
-                success, job_id = JobManager.submit_test_job(test_config)
+                # Get job_id from batch manager (auto-submits batch if needed)
+                job_id = batch_manager.get_job_id(test_config)
 
                 # Validate submission result
-                assert success, f"Job submission failed: {test_config.test_id}"
-                assert job_id, "Unable to get job ID"
+                assert job_id, f"Failed to get job_id for {test_config.test_id}"
 
-                # Wait for completion (timeout/early failure handled inside)
-                JobManager.wait_for_completion(job_id, 10800, test_config, check_early_failure=True)
+                # Wait for completion (timeout: 10 hours = 36000 seconds)
+                JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True)
 
             # End tracking test case
             test_tracker.end_test_case()
@@ -216,7 +214,7 @@ class TestDisaggBenchmark:
 
     @pytest.mark.stress
     @pytest.mark.parametrize("test_config", STRESS_TEST_CASES)
-    def test_stress(self, request, test_config: TestConfig):
+    def test_stress(self, request, batch_manager, test_config: TestConfig):
         """Stress test combining performance benchmarks and accuracy validation.
 
         This test type is designed for stress testing scenarios where both
@@ -265,15 +263,14 @@ class TestDisaggBenchmark:
                 )
                 job_id = EnvManager.get_debug_job_id()
             else:
-                # Submit job using JobManager
-                success, job_id = JobManager.submit_test_job(test_config)
+                # Get job_id from batch manager (auto-submits batch if needed)
+                job_id = batch_manager.get_job_id(test_config)
 
                 # Validate submission result
-                assert success, f"Job submission failed: {test_config.test_id}"
-                assert job_id, "Unable to get job ID"
+                assert job_id, f"Failed to get job_id for {test_config.test_id}"
 
-                # Wait for completion (longer timeout for stress tests: 4 hours)
-                JobManager.wait_for_completion(job_id, 10800, test_config, check_early_failure=True)
+                # Wait for completion (timeout: 10 hours = 36000 seconds)
+                JobManager.wait_for_completion(job_id, 36000, test_config, check_early_failure=True)
 
             # End tracking test case
             test_tracker.end_test_case()

From 342a47bf47b298d5eedc95d93a2923b697c50688 Mon Sep 17 00:00:00 2001
From: TensorRT LLM <90828364+tensorrt-cicd@users.noreply.github.com>
Date: Thu, 8 Jan 2026 03:12:25 +0000
Subject: [PATCH 06/16] [None][infra] Check in most recent lock file from
 nightly pipeline

Signed-off-by: TensorRT LLM <90828364+tensorrt-cicd@users.noreply.github.com>
---
 security_scanning/docs/poetry.lock            |  6 ++---
 .../examples/auto_deploy/poetry.lock          |  6 ++---
 .../examples/draft_target_model/poetry.lock   |  6 ++---
 security_scanning/examples/eagle/poetry.lock  |  6 ++---
 .../llm-eval/lm-eval-harness/poetry.lock      |  6 ++---
 .../examples/lookahead/poetry.lock            |  6 ++---
 security_scanning/examples/medusa/poetry.lock |  6 ++---
 .../models/contrib/baichuan/poetry.lock       | 20 ++++++++--------
 .../examples/models/contrib/bloom/poetry.lock |  6 ++---
 .../models/contrib/chatglm-6b/poetry.lock     |  6 ++---
 .../models/contrib/chatglm2-6b/poetry.lock    |  6 ++---
 .../contrib/chatglm3-6b-32k/poetry.lock       |  6 ++---
 .../examples/models/contrib/dbrx/poetry.lock  |  6 ++---
 .../models/contrib/deepseek_v1/poetry.lock    |  6 ++---
 .../models/contrib/deepseek_v2/poetry.lock    |  6 ++---
 .../models/contrib/falcon/poetry.lock         |  6 ++---
 .../examples/models/contrib/gptj/poetry.lock  |  6 ++---
 .../models/contrib/gptneox/poetry.lock        |  6 ++---
 .../examples/models/contrib/grok/poetry.lock  |  6 ++---
 .../models/contrib/internlm/poetry.lock       |  6 ++---
 .../examples/models/contrib/jais/poetry.lock  |  6 ++---
 .../examples/models/contrib/mmdit/poetry.lock |  6 ++---
 .../examples/models/contrib/mpt/poetry.lock   |  6 ++---
 .../examples/models/contrib/opt/poetry.lock   |  6 ++---
 .../models/contrib/skywork/poetry.lock        |  6 ++---
 .../examples/models/contrib/smaug/poetry.lock |  6 ++---
 .../examples/models/contrib/stdit/poetry.lock | 16 ++++++-------
 .../examples/models/core/commandr/poetry.lock |  6 ++---
 .../examples/models/core/gemma/poetry.lock    |  6 ++---
 .../examples/models/core/glm-4-9b/poetry.lock |  6 ++---
 .../examples/models/core/gpt/poetry.lock      |  6 ++---
 .../examples/models/core/llama/poetry.lock    |  6 ++---
 .../examples/models/core/mamba/poetry.lock    |  6 ++---
 .../examples/models/core/mixtral/poetry.lock  |  6 ++---
 .../examples/models/core/mllama/poetry.lock   |  6 ++---
 .../examples/models/core/nemotron/poetry.lock |  6 ++---
 .../examples/models/core/phi/poetry.lock      |  6 ++---
 .../examples/models/core/qwen/poetry.lock     |  6 ++---
 .../models/core/qwen2audio/poetry.lock        |  6 ++---
 .../examples/models/core/qwenvl/poetry.lock   | 20 ++++++++--------
 .../models/core/recurrentgemma/poetry.lock    |  6 ++---
 .../examples/models/core/whisper/poetry.lock  |  6 ++---
 security_scanning/examples/ngram/poetry.lock  |  6 ++---
 .../examples/quantization/poetry.lock         | 20 ++++++++--------
 .../examples/ray_orchestrator/poetry.lock     | 24 +++++++++----------
 .../examples/redrafter/poetry.lock            |  6 ++---
 security_scanning/examples/serve/poetry.lock  |  6 ++---
 .../examples/trtllm-eval/poetry.lock          |  6 ++---
 security_scanning/metadata.json               |  4 ++--
 security_scanning/poetry.lock                 | 24 +++++++++----------
 security_scanning/pyproject.toml              |  4 ++--
 security_scanning/triton_backend/poetry.lock  |  6 ++---
 52 files changed, 198 insertions(+), 198 deletions(-)

diff --git a/security_scanning/docs/poetry.lock b/security_scanning/docs/poetry.lock
index 86ab05727b..f012c4737f 100644
--- a/security_scanning/docs/poetry.lock
+++ b/security_scanning/docs/poetry.lock
@@ -1195,13 +1195,13 @@ typing-extensions = ">=4.12.0"
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/auto_deploy/poetry.lock b/security_scanning/examples/auto_deploy/poetry.lock
index e4e3db4c5d..607bf79e0c 100644
--- a/security_scanning/examples/auto_deploy/poetry.lock
+++ b/security_scanning/examples/auto_deploy/poetry.lock
@@ -3635,13 +3635,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/draft_target_model/poetry.lock b/security_scanning/examples/draft_target_model/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/draft_target_model/poetry.lock
+++ b/security_scanning/examples/draft_target_model/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/eagle/poetry.lock b/security_scanning/examples/eagle/poetry.lock
index 1a7b36af59..51badcabe3 100644
--- a/security_scanning/examples/eagle/poetry.lock
+++ b/security_scanning/examples/eagle/poetry.lock
@@ -1807,13 +1807,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock b/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock
index a147921560..bd679a1237 100644
--- a/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock
+++ b/security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock
@@ -3273,13 +3273,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/lookahead/poetry.lock b/security_scanning/examples/lookahead/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/lookahead/poetry.lock
+++ b/security_scanning/examples/lookahead/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/medusa/poetry.lock b/security_scanning/examples/medusa/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/medusa/poetry.lock
+++ b/security_scanning/examples/medusa/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/baichuan/poetry.lock b/security_scanning/examples/models/contrib/baichuan/poetry.lock
index 2977c6aab1..533aa08614 100644
--- a/security_scanning/examples/models/contrib/baichuan/poetry.lock
+++ b/security_scanning/examples/models/contrib/baichuan/poetry.lock
@@ -1888,13 +1888,13 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "5.0.0rc1"
+version = "5.0.0rc2"
 description = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training."
 optional = false
 python-versions = ">=3.10.0"
 files = [
-    {file = "transformers-5.0.0rc1-py3-none-any.whl", hash = "sha256:8b9604700769872cab4280dbcde201f557e93f72ee5a85c4592275ab4f15d330"},
-    {file = "transformers-5.0.0rc1.tar.gz", hash = "sha256:1fdde557b96ef8ea277c45b8e0d558f1e167fe28a98593f4c4aec0277e335821"},
+    {file = "transformers-5.0.0rc2-py3-none-any.whl", hash = "sha256:f8f2a14060ab11f20a0eec39d827af54c1589c327c5799d82808ae3f4167418a"},
+    {file = "transformers-5.0.0rc2.tar.gz", hash = "sha256:9f2fa5e132433dd7eb910dc224b32de0baf758f3b6ffc918dbb632e0af85c07a"},
 ]
 
 [package.dependencies]
@@ -1912,15 +1912,15 @@ typer-slim = "*"
 
 [package.extras]
 accelerate = ["accelerate (>=1.1.0)"]
-all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
+all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
 audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 benchmark = ["optimum-benchmark (>=0.3.0)"]
 chat-template = ["jinja2 (>=3.1.0)", "jmespath (>=1.0.1)"]
 codecarbon = ["codecarbon (>=2.8.1)"]
 deepspeed = ["accelerate (>=1.1.0)", "deepspeed (>=0.9.3)"]
 deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
-dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
-dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
+dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
+dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
 ftfy = ["ftfy"]
 hf-xet = ["hf_xet"]
 hub-kernels = ["kernels (>=0.10.2,<0.11)"]
@@ -1943,7 +1943,7 @@ sklearn = ["scikit-learn"]
 speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
 testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
 tiktoken = ["blobfile", "tiktoken"]
-timm = ["timm (!=1.0.18,<=1.0.19)"]
+timm = ["timm (>=1.0.23)"]
 tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"]
 torch = ["accelerate (>=1.1.0)", "torch (>=2.2)"]
 torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
@@ -2007,13 +2007,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/bloom/poetry.lock b/security_scanning/examples/models/contrib/bloom/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/models/contrib/bloom/poetry.lock
+++ b/security_scanning/examples/models/contrib/bloom/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock b/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock
index 4bcc69beb5..4c278b6e73 100644
--- a/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock
+++ b/security_scanning/examples/models/contrib/chatglm-6b/poetry.lock
@@ -1923,13 +1923,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock b/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock
index 4bcc69beb5..4c278b6e73 100644
--- a/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock
+++ b/security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock
@@ -1923,13 +1923,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock b/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock
index 4bcc69beb5..4c278b6e73 100644
--- a/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock
+++ b/security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock
@@ -1923,13 +1923,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/dbrx/poetry.lock b/security_scanning/examples/models/contrib/dbrx/poetry.lock
index ad71da46bb..4da25d198e 100644
--- a/security_scanning/examples/models/contrib/dbrx/poetry.lock
+++ b/security_scanning/examples/models/contrib/dbrx/poetry.lock
@@ -1805,13 +1805,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock b/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock
index a9fec7edd0..350b5d05f7 100644
--- a/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock
+++ b/security_scanning/examples/models/contrib/deepseek_v1/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock b/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock
index 00f49a484b..b5ea1f3c93 100644
--- a/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock
+++ b/security_scanning/examples/models/contrib/deepseek_v2/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/falcon/poetry.lock b/security_scanning/examples/models/contrib/falcon/poetry.lock
index 734d8430c2..4887acc3b2 100644
--- a/security_scanning/examples/models/contrib/falcon/poetry.lock
+++ b/security_scanning/examples/models/contrib/falcon/poetry.lock
@@ -1874,13 +1874,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/gptj/poetry.lock b/security_scanning/examples/models/contrib/gptj/poetry.lock
index a9fec7edd0..350b5d05f7 100644
--- a/security_scanning/examples/models/contrib/gptj/poetry.lock
+++ b/security_scanning/examples/models/contrib/gptj/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/gptneox/poetry.lock b/security_scanning/examples/models/contrib/gptneox/poetry.lock
index a9fec7edd0..350b5d05f7 100644
--- a/security_scanning/examples/models/contrib/gptneox/poetry.lock
+++ b/security_scanning/examples/models/contrib/gptneox/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/grok/poetry.lock b/security_scanning/examples/models/contrib/grok/poetry.lock
index 9c2dd8c4ee..abb8c091c6 100644
--- a/security_scanning/examples/models/contrib/grok/poetry.lock
+++ b/security_scanning/examples/models/contrib/grok/poetry.lock
@@ -2718,13 +2718,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/internlm/poetry.lock b/security_scanning/examples/models/contrib/internlm/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/models/contrib/internlm/poetry.lock
+++ b/security_scanning/examples/models/contrib/internlm/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/jais/poetry.lock b/security_scanning/examples/models/contrib/jais/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/models/contrib/jais/poetry.lock
+++ b/security_scanning/examples/models/contrib/jais/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/mmdit/poetry.lock b/security_scanning/examples/models/contrib/mmdit/poetry.lock
index 9f2df3b29d..ecf4a89bcc 100644
--- a/security_scanning/examples/models/contrib/mmdit/poetry.lock
+++ b/security_scanning/examples/models/contrib/mmdit/poetry.lock
@@ -1036,13 +1036,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/mpt/poetry.lock b/security_scanning/examples/models/contrib/mpt/poetry.lock
index a9fec7edd0..350b5d05f7 100644
--- a/security_scanning/examples/models/contrib/mpt/poetry.lock
+++ b/security_scanning/examples/models/contrib/mpt/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/opt/poetry.lock b/security_scanning/examples/models/contrib/opt/poetry.lock
index a9fec7edd0..350b5d05f7 100644
--- a/security_scanning/examples/models/contrib/opt/poetry.lock
+++ b/security_scanning/examples/models/contrib/opt/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/skywork/poetry.lock b/security_scanning/examples/models/contrib/skywork/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/models/contrib/skywork/poetry.lock
+++ b/security_scanning/examples/models/contrib/skywork/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/smaug/poetry.lock b/security_scanning/examples/models/contrib/smaug/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/models/contrib/smaug/poetry.lock
+++ b/security_scanning/examples/models/contrib/smaug/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/contrib/stdit/poetry.lock b/security_scanning/examples/models/contrib/stdit/poetry.lock
index be2df63bc7..e951097a08 100644
--- a/security_scanning/examples/models/contrib/stdit/poetry.lock
+++ b/security_scanning/examples/models/contrib/stdit/poetry.lock
@@ -784,20 +784,20 @@ i18n = ["Babel (>=2.7)"]
 
 [[package]]
 name = "jsonschema"
-version = "4.25.1"
+version = "4.26.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 files = [
-    {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"},
-    {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"},
+    {file = "jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce"},
+    {file = "jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326"},
 ]
 
 [package.dependencies]
 attrs = ">=22.2.0"
 jsonschema-specifications = ">=2023.03.6"
 referencing = ">=0.28.4"
-rpds-py = ">=0.7.1"
+rpds-py = ">=0.25.0"
 
 [package.extras]
 format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
@@ -2202,13 +2202,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/commandr/poetry.lock b/security_scanning/examples/models/core/commandr/poetry.lock
index a9fec7edd0..350b5d05f7 100644
--- a/security_scanning/examples/models/core/commandr/poetry.lock
+++ b/security_scanning/examples/models/core/commandr/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/gemma/poetry.lock b/security_scanning/examples/models/core/gemma/poetry.lock
index 1df2fa84af..f939cd8ff0 100644
--- a/security_scanning/examples/models/core/gemma/poetry.lock
+++ b/security_scanning/examples/models/core/gemma/poetry.lock
@@ -2748,13 +2748,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/glm-4-9b/poetry.lock b/security_scanning/examples/models/core/glm-4-9b/poetry.lock
index 4bcc69beb5..4c278b6e73 100644
--- a/security_scanning/examples/models/core/glm-4-9b/poetry.lock
+++ b/security_scanning/examples/models/core/glm-4-9b/poetry.lock
@@ -1923,13 +1923,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/gpt/poetry.lock b/security_scanning/examples/models/core/gpt/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/models/core/gpt/poetry.lock
+++ b/security_scanning/examples/models/core/gpt/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/llama/poetry.lock b/security_scanning/examples/models/core/llama/poetry.lock
index 63461d261f..377d1ba39f 100644
--- a/security_scanning/examples/models/core/llama/poetry.lock
+++ b/security_scanning/examples/models/core/llama/poetry.lock
@@ -1874,13 +1874,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/mamba/poetry.lock b/security_scanning/examples/models/core/mamba/poetry.lock
index 1357ac01b9..2e5ea9eec8 100644
--- a/security_scanning/examples/models/core/mamba/poetry.lock
+++ b/security_scanning/examples/models/core/mamba/poetry.lock
@@ -1874,13 +1874,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/mixtral/poetry.lock b/security_scanning/examples/models/core/mixtral/poetry.lock
index 723716366e..c000cfcdd5 100644
--- a/security_scanning/examples/models/core/mixtral/poetry.lock
+++ b/security_scanning/examples/models/core/mixtral/poetry.lock
@@ -1315,13 +1315,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/mllama/poetry.lock b/security_scanning/examples/models/core/mllama/poetry.lock
index 29d28af449..43a87a1c89 100644
--- a/security_scanning/examples/models/core/mllama/poetry.lock
+++ b/security_scanning/examples/models/core/mllama/poetry.lock
@@ -1800,13 +1800,13 @@ typing-extensions = ">=4.12.0"
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/nemotron/poetry.lock b/security_scanning/examples/models/core/nemotron/poetry.lock
index a9fec7edd0..350b5d05f7 100644
--- a/security_scanning/examples/models/core/nemotron/poetry.lock
+++ b/security_scanning/examples/models/core/nemotron/poetry.lock
@@ -1753,13 +1753,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/phi/poetry.lock b/security_scanning/examples/models/core/phi/poetry.lock
index db07266c65..074cfc6f1b 100644
--- a/security_scanning/examples/models/core/phi/poetry.lock
+++ b/security_scanning/examples/models/core/phi/poetry.lock
@@ -1816,13 +1816,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/qwen/poetry.lock b/security_scanning/examples/models/core/qwen/poetry.lock
index 95f4cf1e21..7f07926aee 100644
--- a/security_scanning/examples/models/core/qwen/poetry.lock
+++ b/security_scanning/examples/models/core/qwen/poetry.lock
@@ -3452,13 +3452,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/qwen2audio/poetry.lock b/security_scanning/examples/models/core/qwen2audio/poetry.lock
index 6ff53e9c23..1e9fab8dc6 100644
--- a/security_scanning/examples/models/core/qwen2audio/poetry.lock
+++ b/security_scanning/examples/models/core/qwen2audio/poetry.lock
@@ -1971,13 +1971,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/qwenvl/poetry.lock b/security_scanning/examples/models/core/qwenvl/poetry.lock
index a19aa6b0d0..6ca5cc273e 100644
--- a/security_scanning/examples/models/core/qwenvl/poetry.lock
+++ b/security_scanning/examples/models/core/qwenvl/poetry.lock
@@ -2927,13 +2927,13 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "5.0.0rc1"
+version = "5.0.0rc2"
 description = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training."
 optional = false
 python-versions = ">=3.10.0"
 files = [
-    {file = "transformers-5.0.0rc1-py3-none-any.whl", hash = "sha256:8b9604700769872cab4280dbcde201f557e93f72ee5a85c4592275ab4f15d330"},
-    {file = "transformers-5.0.0rc1.tar.gz", hash = "sha256:1fdde557b96ef8ea277c45b8e0d558f1e167fe28a98593f4c4aec0277e335821"},
+    {file = "transformers-5.0.0rc2-py3-none-any.whl", hash = "sha256:f8f2a14060ab11f20a0eec39d827af54c1589c327c5799d82808ae3f4167418a"},
+    {file = "transformers-5.0.0rc2.tar.gz", hash = "sha256:9f2fa5e132433dd7eb910dc224b32de0baf758f3b6ffc918dbb632e0af85c07a"},
 ]
 
 [package.dependencies]
@@ -2951,15 +2951,15 @@ typer-slim = "*"
 
 [package.extras]
 accelerate = ["accelerate (>=1.1.0)"]
-all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
+all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
 audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 benchmark = ["optimum-benchmark (>=0.3.0)"]
 chat-template = ["jinja2 (>=3.1.0)", "jmespath (>=1.0.1)"]
 codecarbon = ["codecarbon (>=2.8.1)"]
 deepspeed = ["accelerate (>=1.1.0)", "deepspeed (>=0.9.3)"]
 deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
-dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
-dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
+dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
+dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
 ftfy = ["ftfy"]
 hf-xet = ["hf_xet"]
 hub-kernels = ["kernels (>=0.10.2,<0.11)"]
@@ -2982,7 +2982,7 @@ sklearn = ["scikit-learn"]
 speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
 testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
 tiktoken = ["blobfile", "tiktoken"]
-timm = ["timm (!=1.0.18,<=1.0.19)"]
+timm = ["timm (>=1.0.23)"]
 tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"]
 torch = ["accelerate (>=1.1.0)", "torch (>=2.2)"]
 torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
@@ -3074,13 +3074,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/recurrentgemma/poetry.lock b/security_scanning/examples/models/core/recurrentgemma/poetry.lock
index 740545c551..fd983afd51 100644
--- a/security_scanning/examples/models/core/recurrentgemma/poetry.lock
+++ b/security_scanning/examples/models/core/recurrentgemma/poetry.lock
@@ -2515,13 +2515,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/models/core/whisper/poetry.lock b/security_scanning/examples/models/core/whisper/poetry.lock
index 0f1e21f781..fd7e7ec9f3 100644
--- a/security_scanning/examples/models/core/whisper/poetry.lock
+++ b/security_scanning/examples/models/core/whisper/poetry.lock
@@ -2873,13 +2873,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/ngram/poetry.lock b/security_scanning/examples/ngram/poetry.lock
index 60b7e78b1e..3bfc89605a 100644
--- a/security_scanning/examples/ngram/poetry.lock
+++ b/security_scanning/examples/ngram/poetry.lock
@@ -1821,13 +1821,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/quantization/poetry.lock b/security_scanning/examples/quantization/poetry.lock
index fa3dfba0e4..96d2ed9066 100644
--- a/security_scanning/examples/quantization/poetry.lock
+++ b/security_scanning/examples/quantization/poetry.lock
@@ -1855,13 +1855,13 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "5.0.0rc1"
+version = "5.0.0rc2"
 description = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training."
 optional = false
 python-versions = ">=3.10.0"
 files = [
-    {file = "transformers-5.0.0rc1-py3-none-any.whl", hash = "sha256:8b9604700769872cab4280dbcde201f557e93f72ee5a85c4592275ab4f15d330"},
-    {file = "transformers-5.0.0rc1.tar.gz", hash = "sha256:1fdde557b96ef8ea277c45b8e0d558f1e167fe28a98593f4c4aec0277e335821"},
+    {file = "transformers-5.0.0rc2-py3-none-any.whl", hash = "sha256:f8f2a14060ab11f20a0eec39d827af54c1589c327c5799d82808ae3f4167418a"},
+    {file = "transformers-5.0.0rc2.tar.gz", hash = "sha256:9f2fa5e132433dd7eb910dc224b32de0baf758f3b6ffc918dbb632e0af85c07a"},
 ]
 
 [package.dependencies]
@@ -1879,15 +1879,15 @@ typer-slim = "*"
 
 [package.extras]
 accelerate = ["accelerate (>=1.1.0)"]
-all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
+all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "codecarbon (>=2.8.1)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
 audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 benchmark = ["optimum-benchmark (>=0.3.0)"]
 chat-template = ["jinja2 (>=3.1.0)", "jmespath (>=1.0.1)"]
 codecarbon = ["codecarbon (>=2.8.1)"]
 deepspeed = ["accelerate (>=1.1.0)", "deepspeed (>=0.9.3)"]
 deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
-dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
-dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
+dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jinja2 (>=3.1.0)", "jmespath (>=1.0.1)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
+dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=1.1.0)", "accelerate (>=1.1.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.10.2,<0.11)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (>=1.0.23)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
 ftfy = ["ftfy"]
 hf-xet = ["hf_xet"]
 hub-kernels = ["kernels (>=0.10.2,<0.11)"]
@@ -1910,7 +1910,7 @@ sklearn = ["scikit-learn"]
 speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
 testing = ["GitPython (<3.1.19)", "accelerate (>=1.1.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.4.6)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0,<9.0.0)", "pytest-asyncio (>=1.2.0)", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
 tiktoken = ["blobfile", "tiktoken"]
-timm = ["timm (!=1.0.18,<=1.0.19)"]
+timm = ["timm (>=1.0.23)"]
 tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"]
 torch = ["accelerate (>=1.1.0)", "torch (>=2.2)"]
 torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
@@ -1974,13 +1974,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/ray_orchestrator/poetry.lock b/security_scanning/examples/ray_orchestrator/poetry.lock
index b6c048c435..bfa715a5b1 100644
--- a/security_scanning/examples/ray_orchestrator/poetry.lock
+++ b/security_scanning/examples/ray_orchestrator/poetry.lock
@@ -730,20 +730,20 @@ type = ["mypy (<1.19)", "pytest-mypy (>=1.0.1)"]
 
 [[package]]
 name = "jsonschema"
-version = "4.25.1"
+version = "4.26.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 files = [
-    {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"},
-    {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"},
+    {file = "jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce"},
+    {file = "jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326"},
 ]
 
 [package.dependencies]
 attrs = ">=22.2.0"
 jsonschema-specifications = ">=2023.03.6"
 referencing = ">=0.28.4"
-rpds-py = ">=0.7.1"
+rpds-py = ">=0.25.0"
 
 [package.extras]
 format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
@@ -1891,13 +1891,13 @@ typing-extensions = ">=4.12.0"
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
@@ -1908,18 +1908,18 @@ zstd = ["backports-zstd (>=1.0.0)"]
 
 [[package]]
 name = "virtualenv"
-version = "20.35.4"
+version = "20.36.0"
 description = "Virtual Python Environment builder"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b"},
-    {file = "virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c"},
+    {file = "virtualenv-20.36.0-py3-none-any.whl", hash = "sha256:e7ded577f3af534fd0886d4ca03277f5542053bedb98a70a989d3c22cfa5c9ac"},
+    {file = "virtualenv-20.36.0.tar.gz", hash = "sha256:a3601f540b515a7983508113f14e78993841adc3d83710fa70f0ac50f43b23ed"},
 ]
 
 [package.dependencies]
 distlib = ">=0.3.7,<1"
-filelock = ">=3.12.2,<4"
+filelock = {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""}
 platformdirs = ">=3.9.1,<5"
 typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.11\""}
 
diff --git a/security_scanning/examples/redrafter/poetry.lock b/security_scanning/examples/redrafter/poetry.lock
index fa46e1e5db..9175f8b9ec 100644
--- a/security_scanning/examples/redrafter/poetry.lock
+++ b/security_scanning/examples/redrafter/poetry.lock
@@ -1831,13 +1831,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/serve/poetry.lock b/security_scanning/examples/serve/poetry.lock
index e24bcb08ca..6138f057f9 100644
--- a/security_scanning/examples/serve/poetry.lock
+++ b/security_scanning/examples/serve/poetry.lock
@@ -2600,13 +2600,13 @@ test = ["coverage", "pytest", "pytest-cov"]
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/examples/trtllm-eval/poetry.lock b/security_scanning/examples/trtllm-eval/poetry.lock
index f13e70ad4a..28c200491c 100644
--- a/security_scanning/examples/trtllm-eval/poetry.lock
+++ b/security_scanning/examples/trtllm-eval/poetry.lock
@@ -3275,13 +3275,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
diff --git a/security_scanning/metadata.json b/security_scanning/metadata.json
index 567d8ffa7a..0acdbb16e9 100644
--- a/security_scanning/metadata.json
+++ b/security_scanning/metadata.json
@@ -1,4 +1,4 @@
 {
-  "commit_hash": "6095c80e560c612b8a8f4ff70e8930edf7798d21",
-  "timestamp": "2026-01-07T02:39:54Z"
+  "commit_hash": "b85c447ceb1ff91c5d4df6b71de2256a5fabfe9d",
+  "timestamp": "2026-01-08T02:42:38Z"
 }
diff --git a/security_scanning/poetry.lock b/security_scanning/poetry.lock
index 6d04c2d679..4ef7b31b7a 100644
--- a/security_scanning/poetry.lock
+++ b/security_scanning/poetry.lock
@@ -1838,20 +1838,20 @@ files = [
 
 [[package]]
 name = "jsonschema"
-version = "4.25.1"
+version = "4.26.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 files = [
-    {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"},
-    {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"},
+    {file = "jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce"},
+    {file = "jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326"},
 ]
 
 [package.dependencies]
 attrs = ">=22.2.0"
 jsonschema-specifications = ">=2023.03.6"
 referencing = ">=0.28.4"
-rpds-py = ">=0.7.1"
+rpds-py = ">=0.25.0"
 
 [package.extras]
 format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
@@ -3852,13 +3852,13 @@ xmp = ["defusedxml"]
 
 [[package]]
 name = "plotly"
-version = "6.5.0"
+version = "6.5.1"
 description = "An open-source interactive data visualization library for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "plotly-6.5.0-py3-none-any.whl", hash = "sha256:5ac851e100367735250206788a2b1325412aa4a4917a4fe3e6f0bc5aa6f3d90a"},
-    {file = "plotly-6.5.0.tar.gz", hash = "sha256:d5d38224883fd38c1409bef7d6a8dc32b74348d39313f3c52ca998b8e447f5c8"},
+    {file = "plotly-6.5.1-py3-none-any.whl", hash = "sha256:5adad4f58c360612b6c5ce11a308cdbc4fd38ceb1d40594a614f0062e227abe1"},
+    {file = "plotly-6.5.1.tar.gz", hash = "sha256:b0478c8d5ada0c8756bce15315bcbfec7d3ab8d24614e34af9aff7bfcfea9281"},
 ]
 
 [package.dependencies]
@@ -5930,13 +5930,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]
@@ -6339,4 +6339,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "04c93699fd51f42c19b5111e408e5240d4dc0616a9d2501c7d159c01b3331ca2"
+content-hash = "dab9694d64d1c91b512eb62bbd31da9d0cdb8c93e99941a7022f2f46aea905e3"
diff --git a/security_scanning/pyproject.toml b/security_scanning/pyproject.toml
index bc7ac1222e..f8addf4b75 100644
--- a/security_scanning/pyproject.toml
+++ b/security_scanning/pyproject.toml
@@ -59,7 +59,7 @@ einops = "^0.8.1"
 flashinfer-python = ">=0.3.0,<0.4.0"
 xgrammar = "0.1.25"
 llguidance = "0.7.29"
-jsonschema = "^4.25.1"
+jsonschema = "^4.26.0"
 backoff = "^2.2.1"
 nvtx = "^0.2.14"
 matplotlib = "^3.10.8"
@@ -73,7 +73,7 @@ tiktoken = "^0.12.0"
 blobfile = "^3.1.0"
 openai-harmony = "0.0.4"
 nvidia-cutlass-dsl = "4.3.4"
-plotly = "^6.5.0"
+plotly = "^6.5.1"
 numexpr = "<2.14.0"
 partial-json-parser = "^0.2.1.1.post7"
 torch-c-dlpack-ext = "0.1.3"
diff --git a/security_scanning/triton_backend/poetry.lock b/security_scanning/triton_backend/poetry.lock
index 6fa35b5ad7..2c00043517 100644
--- a/security_scanning/triton_backend/poetry.lock
+++ b/security_scanning/triton_backend/poetry.lock
@@ -982,13 +982,13 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.2"
+version = "2.6.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
-    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
+    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
+    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
 ]
 
 [package.extras]

From 30f8455d295cd287ef11e5777d33312bddb2c595 Mon Sep 17 00:00:00 2001
From: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
Date: Wed, 7 Jan 2026 23:33:45 -0500
Subject: [PATCH 07/16] [https://nvbugs/5747878][fix] unwaive llama4 scout
 tests (#10468)

Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
---
 tests/integration/test_lists/waives.txt                        | 3 ---
 .../_torch/auto_deploy/_utils_test/_model_test_utils.py        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index f211cdb1fd..469772a681 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -403,8 +403,6 @@ examples/test_phi.py::test_llm_phi_quantization_1gpu[phi-2-fp8-bfloat16] SKIP (h
 test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5744432)
 test_e2e.py::test_trtllm_serve_multimodal_example SKIP (https://nvbugs/5747920)
 test_e2e.py::test_trtllm_serve_example SKIP (https://nvbugs/5747938)
-unittest/_torch/auto_deploy/unit/singlegpu/test_ad_build_small_single.py::test_build_ad[meta-llama/Llama-4-Scout-17B-16E-Instruct-llm_extra_args8] SKIP (https://nvbugs/5747878)
-unittest/_torch/auto_deploy/unit/singlegpu/test_ad_build_small_single.py::test_build_ad[meta-llama/Llama-4-Scout-17B-16E-Instruct-llm_extra_args9] SKIP (https://nvbugs/5747878)
 triton_server/test_triton.py::test_opt[opt] SKIP (https://nvbugs/5739981)
 unittest/llmapi/test_llm_pytorch.py::test_tinyllama_logits_processor[False] SKIP (https://nvbugs/5771838)
 unittest/llmapi/test_llm_pytorch.py::test_tinyllama_logits_processor[True] SKIP (https://nvbugs/5771838)
@@ -415,7 +413,6 @@ accuracy/test_cli_flow.py::TestPhi3Mini128kInstruct::test_auto_dtype SKIP (https
 accuracy/test_cli_flow.py::TestPhi3Small8kInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
 accuracy/test_cli_flow.py::TestPhi3Small128kInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
 accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
-unittest/_torch/auto_deploy/unit/singlegpu/models/test_llama4_vlm_patch.py::test_build_run_llama4_vlm SKIP (https://nvbugs/5747878)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377)
 cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941)
 examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertForQuestionAnswering-bert/bert-base-cased-squad2] SKIP (https://nvbugs/5608979)
diff --git a/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py b/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py
index 5adb8e5a73..a71a09b465 100644
--- a/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py
+++ b/tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py
@@ -393,7 +393,7 @@ _SMALL_MODEL_CONFIGS = {
         },
     },
     "meta-llama/Llama-4-Scout-17B-16E-Instruct": {
-        "llm_models_subdir": "Llama-4-Scout-17B-16E-Instruct",
+        "llm_models_subdir": "llama4-models/Llama-4-Scout-17B-16E-Instruct",
         "model_factory": "AutoModelForImageTextToText",
         "model_kwargs": {
             "text_config": {

From f57aab5255159b63bc0f35916cbf6d93ab7de631 Mon Sep 17 00:00:00 2001
From: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com>
Date: Thu, 8 Jan 2026 14:58:55 +0800
Subject: [PATCH 08/16] [https://nvbugs/5775402][fix] Fix concurrency list in
 Wide-EP perf tests (#10529)

Signed-off-by: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com>
---
 ...22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml | 2 +-
 ...2B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml | 2 +-
 ...22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml | 2 +-
 ...-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml | 2 +-
 ...k-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml | 2 +-
 ...r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml | 2 +-
 ...-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml | 2 +-
 ...k-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml | 2 +-
 ...-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...k-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml | 2 +-
 ...v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml | 2 +-
 ...32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 ...v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml | 2 +-
 ...v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml | 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml
index 705e683aab..3c33b288e5 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-NIXL.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: 512 1075
+  concurrency_list: 512 1024
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml
index 53dcaef3df..0a6135f34a 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx1_gen1_dep16_bs64_eplb288_mtp3_ccb-UCX.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: 512 1075
+  concurrency_list: 512 1024
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml
index 29251630c9..3c0b8d2e7a 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-NIXL.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '2150'
+  concurrency_list: '2048'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml
index 409e2284c1..2dd7fd80b2 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/Qwen3-235B-A22B-FP4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp1_ccb-UCX.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '2150'
+  concurrency_list: '2048'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
index ce4527e18b..fedb8825b2 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '1075'
+  concurrency_list: '1024'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
index d1d2792ed4..5766454980 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-UCX.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '1075'
+  concurrency_list: '1024'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
index f2f39b906f..4d4f8cb7db 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '2150'
+  concurrency_list: '2048'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
index 58df066b58..fc12422943 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-UCX.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '2150'
+  concurrency_list: '2048'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
index bdb222ece4..60b022a21d 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '1075'
+  concurrency_list: '1024'
   input_length: 8192
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
index e1330be1ed..f4cfcda4e6 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-UCX.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '1075'
+  concurrency_list: '1024'
   input_length: 8192
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
index 2fc065b480..dafc6a7df7 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '538'
+  concurrency_list: '512'
   input_length: 8192
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
index b4efd20397..bb54d661a5 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-r1-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-UCX.yaml
@@ -22,7 +22,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '538'
+  concurrency_list: '512'
   input_length: 8192
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
index 451a995e30..4dd4d7fb46 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx1_gen1_dep32_bs32_eplb288_mtp0_ccb-NIXL.yaml
@@ -23,7 +23,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '1075'
+  concurrency_list: '1024'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
index f67ff56f88..ca80042c69 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_1k1k_ctx2_gen1_dep16_bs128_eplb288_mtp3_ccb-NIXL.yaml
@@ -23,7 +23,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '2150'
+  concurrency_list: '2048'
   input_length: 1024
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
index 4d3a716c67..c262e3f661 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx6_gen1_dep16_bs64_eplb288_mtp0_ccb-NIXL.yaml
@@ -23,7 +23,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '1075'
+  concurrency_list: '1024'
   input_length: 8192
   output_length: 1024
   dataset_file: <dataset_file>
diff --git a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
index 441aebf189..0b7bc63e3f 100644
--- a/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/integration/defs/perf/disagg/test_configs/wideep/perf/deepseek-v32-fp4_8k1k_ctx8_gen1_dep32_bs16_eplb288_mtp3_ccb-NIXL.yaml
@@ -23,7 +23,7 @@ benchmark:
   multi_round: 8
   benchmark_ratio: 0.8
   streaming: true
-  concurrency_list: '538'
+  concurrency_list: '512'
   input_length: 8192
   output_length: 1024
   dataset_file: <dataset_file>

From 22c81cb5fa2c37aa60768ef312abaf77baf8cb2e Mon Sep 17 00:00:00 2001
From: HuiGao-NV <huig@nvidia.com>
Date: Thu, 8 Jan 2026 15:15:30 +0800
Subject: [PATCH 09/16] [None][chore] Enable seg fault cases since one race
 condition is fixed (#10398)

Signed-off-by: Hui Gao <huig@nvidia.com>
---
 tests/integration/test_lists/waives.txt | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index 469772a681..ec161196b8 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -382,8 +382,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUT
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
 unittest/llmapi/test_llm_pytorch.py::test_llm_reward_model SKIP (https://nvbugs/5670458)
-accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5740377)
-accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_trtllm-torch_compile=False] SKIP (https://nvbugs/5740377)
 accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=2] SKIP (https://nvbugs/5740075)
 accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2] SKIP (https://nvbugs/5740075)
 unittest/_torch/modeling/test_modeling_out_of_tree.py::TestOutOfTree::test_llm_api[False] SKIP (https://nvbugs/5739981)
@@ -395,8 +393,6 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_tr
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5741304)
 unittest/_torch/multi_gpu/test_allreduce.py::test_allreduce_fusion_patterns[2-residual_rms_norm_out_quant_fp8-hidden:7168-seqlen:8192] SKIP (https://nvbugs/5741392)
 unittest/executor/test_rpc.py::TestRpcCorrectness::test_incremental_task_async SKIP (https://nvbugs/5741476)
-accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5740377)
-accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False] SKIP (https://nvbugs/5740377)
 examples/test_phi.py::test_phi_fp8_with_bf16_lora[phi-2] SKIP (https://nvbugs/5744293)
 examples/test_phi.py::test_llm_phi_1node_2gpus_summary[Phi-3.5-MoE-instruct-nb:1] SKIP (https://nvbugs/5744293)
 examples/test_phi.py::test_llm_phi_quantization_1gpu[phi-2-fp8-bfloat16] SKIP (https://nvbugs/5744293)
@@ -413,7 +409,6 @@ accuracy/test_cli_flow.py::TestPhi3Mini128kInstruct::test_auto_dtype SKIP (https
 accuracy/test_cli_flow.py::TestPhi3Small8kInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
 accuracy/test_cli_flow.py::TestPhi3Small128kInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
 accuracy/test_cli_flow.py::TestPhi3_5MiniInstruct::test_auto_dtype SKIP (https://nvbugs/5744293)
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True-moe_backend=TRTLLM] SKIP (https://nvbugs/5740377)
 cpp/test_multi_gpu.py::TestDisagg::test_symmetric_executor[gpt-2proc-mpi_kvcache-90] SKIP (https://nvbugs/5755941)
 examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertForQuestionAnswering-bert/bert-base-cased-squad2] SKIP (https://nvbugs/5608979)
 examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2] SKIP (https://nvbugs/5608979)
@@ -482,7 +477,6 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_sof
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.9] SKIP (https://nvbugs/5774869)
 triton_server/test_triton.py::test_llava_onevision[llava_onevision] SKIP (https://nvbugs/5775205)
 triton_server/test_triton.py::test_gpt_ib_lad[gpt-ib-lad] SKIP (https://nvbugs/5775223)
-accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_trtllm-torch_compile=True] SKIP (https://nvbugs/5740377)
 unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_cute_dsl_multi_gpu[MoEWeightLoadingMode.FUSED_GATE_UP_PROJ-DefaultMoeRoutingMethod-1] SKIP (https://nvbugs/5775256)
 unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_ep_sharding.py::test_ep_shard[3-2] SKIP (https://nvbugs/5777041)
 unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_ep_sharding.py::test_ep_shard[8-2] SKIP (https://nvbugs/5777041)

From 8d4b09dac63ada893c90eb17a78d72affa581ec7 Mon Sep 17 00:00:00 2001
From: dongfengy <99041270+dongfengy@users.noreply.github.com>
Date: Thu, 8 Jan 2026 15:30:53 +0800
Subject: [PATCH 10/16] [None][doc] Update GPTOSS Doc (#10536)

Signed-off-by: Dongfeng Yu <dongfengy@nvidia.com>
---
 docs/source/models/supported-models.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/models/supported-models.md b/docs/source/models/supported-models.md
index 2185e38473..332304f5d5 100644
--- a/docs/source/models/supported-models.md
+++ b/docs/source/models/supported-models.md
@@ -40,10 +40,11 @@ Note: Support for other models may vary. Features marked "N/A" are not applicabl
 | `Qwen3MoeForCausalLM`            | Yes               | Yes        | Yes                        | Yes                   | Yes             | No  | Yes                       | Yes                       | Yes           | Yes              | Yes            | N/A                      | Yes                   | Yes             |
 | `Qwen3NextForCausalLM`           | Yes                | Yes        | No                         | Untested                    | Yes              | No  | No                        | No                        | Yes            | Yes               | No             | No                       | Untested                    | Untested              |
 | `Llama4ForConditionalGeneration` | Yes               | Yes        | Yes                        | Yes                   | Yes             | No  | Yes                       | Yes                       | Yes           | Yes              | Untested       | N/A                      | Yes                   | Yes             |
-| `GptOssForCausalLM`            | Yes              | Yes         | Yes                        | Yes                   | No             | No   | Yes                       | No                        | Yes           | Yes              | No             | N/A                      | Yes                    | Yes             |
+| `GptOssForCausalLM`            | Yes              | Yes         | Yes                        | Yes                   | Yes             | No   | Yes                       | Yes [^3]                   | Yes           | Yes              | Yes             | N/A                      | Yes                    | Yes             |
 
 [^1]: Chunked Prefill for MLA can only be enabled on SM100/SM103.
 [^2]: KV cache reuse for MLA can only be enabled on SM90/SM100/SM103 and in BF16/FP8 KV cache dtype.
+[^3]: Overlap scheduler isn't supported when using EAGLE-3(Two Model Engine) for GPT-OSS.
 
 
 # Multimodal Feature Support Matrix (PyTorch Backend)

From 43839c7d9b4b5d3898388b83dc58763e8eaba1ae Mon Sep 17 00:00:00 2001
From: Emma Qiao <qqiao@nvidia.com>
Date: Thu, 8 Jan 2026 15:33:48 +0800
Subject: [PATCH 11/16] [TRTLLM-9642][infra] Increase pytest verbosity for
 failed tests (#9657)

Signed-off-by: qqiao <qqiao@nvidia.com>
Signed-off-by: Emma Qiao <qqiao@nvidia.com>
---
 jenkins/L0_Test.groovy                   | 2 +-
 tests/integration/defs/test_unittests.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
index f3a8226167..a6fef33a63 100644
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@@ -808,7 +808,7 @@ def getPytestBaseCommandLine(
         portEnvVars,
         pytestUtil,
         "pytest",
-        "-v",
+        "-vv",
         testFilter[(DETAILED_LOG)] ? "-s" : "",
         "--timeout-method=thread",
         "--apply-test-list-correction",
diff --git a/tests/integration/defs/test_unittests.py b/tests/integration/defs/test_unittests.py
index d2cd80692c..707426f822 100644
--- a/tests/integration/defs/test_unittests.py
+++ b/tests/integration/defs/test_unittests.py
@@ -126,8 +126,8 @@ def test_unittests_v2(llm_root, llm_venv, case: str, output_dir, request):
                               f'results-sub-unittests-{case_fn}.xml')
 
     command = [
-        '-m', 'pytest', ignore_opt, "-v", "--tb=short", "-rF", "--timeout=2400",
-        "--timeout-method=thread"
+        '-m', 'pytest', ignore_opt, "-vv", "--tb=short", "-rF",
+        "--timeout=2400", "--timeout-method=thread"
     ]
     if test_prefix:
         command += [f"--test-prefix={test_prefix}"]

From dc6b743fb69f4b3b0030d13e533145fe3185265b Mon Sep 17 00:00:00 2001
From: Yiqing Yan <yiqingy@nvidia.com>
Date: Thu, 8 Jan 2026 17:51:44 +0800
Subject: [PATCH 12/16] [None][chore] Bump version to 1.2.0rc8 (#10542)

Signed-off-by: Yiqing Yan <yiqingy@nvidia.com>
---
 README.md                | 2 +-
 examples/constraints.txt | 2 +-
 tensorrt_llm/version.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index c1476bfef1..9e78da9a47 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ state-of-the-art optimizations to perform inference efficiently on NVIDIA GPUs.<
 [![python](https://img.shields.io/badge/python-3.10-green)](https://www.python.org/downloads/release/python-31012/)
 [![cuda](https://img.shields.io/badge/cuda-13.0.0-green)](https://developer.nvidia.com/cuda-downloads)
 [![torch](https://img.shields.io/badge/torch-2.9.0-green)](https://pytorch.org)
-[![version](https://img.shields.io/badge/release-1.2.0rc7-green)](https://github.com/NVIDIA/TensorRT-LLM/blob/main/tensorrt_llm/version.py)
+[![version](https://img.shields.io/badge/release-1.2.0rc8-green)](https://github.com/NVIDIA/TensorRT-LLM/blob/main/tensorrt_llm/version.py)
 [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/NVIDIA/TensorRT-LLM/blob/main/LICENSE)
 
 [Architecture](https://nvidia.github.io/TensorRT-LLM/developer-guide/overview.html)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Performance](https://nvidia.github.io/TensorRT-LLM/developer-guide/perf-overview.html)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](https://nvidia.github.io/TensorRT-LLM/quick-start-guide.html)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentation](https://nvidia.github.io/TensorRT-LLM/)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Roadmap](https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap)
diff --git a/examples/constraints.txt b/examples/constraints.txt
index 2637847500..02571fa1b6 100644
--- a/examples/constraints.txt
+++ b/examples/constraints.txt
@@ -1,3 +1,3 @@
-tensorrt_llm==1.2.0rc7
+tensorrt_llm==1.2.0rc8
 evaluate~=0.4.1
 rouge_score~=0.1.2
diff --git a/tensorrt_llm/version.py b/tensorrt_llm/version.py
index 504baf79de..434ba8f8e2 100644
--- a/tensorrt_llm/version.py
+++ b/tensorrt_llm/version.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.0rc7"
+__version__ = "1.2.0rc8"

From bea61bb17d7e7b15d1ffc95e47606af5e201598d Mon Sep 17 00:00:00 2001
From: bhsueh_NV <11360707+byshiue@users.noreply.github.com>
Date: Thu, 8 Jan 2026 19:38:49 +0800
Subject: [PATCH 13/16] [None][fix] Mistral large 3 few code refine (#10405)

Signed-off-by: bhsueh <11360707+byshiue@users.noreply.github.com>
---
 .../models/core/mistral_large_3/README.md     |  3 +-
 .../checkpoints/mistral/config_loader.py      |  7 +-
 .../_torch/models/modeling_mistral.py         | 64 +++++++++++--------
 tensorrt_llm/evaluate/lm_eval.py              | 31 ++++++---
 tensorrt_llm/inputs/utils.py                  |  6 --
 .../defs/accuracy/accuracy_core.py            |  2 +
 .../defs/accuracy/references/mmlu.yaml        |  4 +-
 .../defs/accuracy/references/mmmu.yaml        |  3 +-
 .../test_llm_api_pytorch_multimodal.py        | 15 ++++-
 .../test-db/l0_gb200_multi_gpus.yml           |  4 +-
 tests/integration/test_lists/waives.txt       |  3 -
 11 files changed, 84 insertions(+), 58 deletions(-)

diff --git a/examples/models/core/mistral_large_3/README.md b/examples/models/core/mistral_large_3/README.md
index 5ea730c9f1..da219bf7b0 100644
--- a/examples/models/core/mistral_large_3/README.md
+++ b/examples/models/core/mistral_large_3/README.md
@@ -19,7 +19,8 @@ mpirun -n 1 --allow-run-as-root --oversubscribe python3 examples/llm-api/quickst
     --max_tokens 100 \
     --checkpoint_format mistral \
     --model_type mistral_large_3 \
-    --moe_backend TRTLLM
+    --moe_backend TRTLLM \
+    --image_format pil
 ```
 
 ## LLM-only run
diff --git a/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py b/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py
index b72cb6da38..c679734fcf 100644
--- a/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py
+++ b/tensorrt_llm/_torch/models/checkpoints/mistral/config_loader.py
@@ -103,17 +103,14 @@ def _remap_mistral_yarn_args(config: dict) -> dict:
         "apply_scale": "apply_yarn_scaling",
     }
     yarn_config = config.get("yarn") or {}
-    config["rope_parameters"] = {
+    config["rope_scaling"] = {
         "rope_type": "yarn",
         "mscale_all_dim": 1,
     }
 
-    if rope_theta := config.pop("rope_theta", None):
-        config["rope_parameters"]["rope_theta"] = rope_theta
-
     for old_name, new_name in yarn_config_map.items():
         if old_name in yarn_config:
-            config["rope_parameters"][new_name] = yarn_config.pop(old_name)
+            config["rope_scaling"][new_name] = yarn_config.pop(old_name)
 
     assert len(yarn_config) == 0, f"Unparsed yarn config: {yarn_config}"
 
diff --git a/tensorrt_llm/_torch/models/modeling_mistral.py b/tensorrt_llm/_torch/models/modeling_mistral.py
index ea06b5e100..99ff8169c1 100644
--- a/tensorrt_llm/_torch/models/modeling_mistral.py
+++ b/tensorrt_llm/_torch/models/modeling_mistral.py
@@ -46,6 +46,7 @@ from tensorrt_llm.inputs import (BaseMultimodalDummyInputsBuilder,
                                  MultimodalPlaceholderPlacement, TextPrompt,
                                  register_input_processor)
 from tensorrt_llm.inputs.multimodal import MultimodalParams
+from tensorrt_llm.inputs.utils import encode_base64_image
 from tensorrt_llm.llmapi import SamplingParams
 from tensorrt_llm.logger import logger
 
@@ -58,16 +59,28 @@ class MistralAttention(Attention):
         layer_idx: int | None = None,
     ):
         config = model_config.pretrained_config
+        rope_params = RopeParams.from_config(config)
+        rope_params_section = getattr(config, "rope_scaling", None) or getattr(
+            config, "rope_parameters", None)
+        rope_type = getattr(rope_params_section, "rope_type", None)
+        if rope_type == "yarn":
+            pos_embd_params = PositionalEmbeddingParams(
+                type=PositionEmbeddingType.yarn,
+                rope=rope_params,
+                is_neox=False)
+        else:
+            pos_embd_params = PositionalEmbeddingParams(
+                type=PositionEmbeddingType.rope_gpt_neox,
+                rope=rope_params,
+            )
+
         super().__init__(
             hidden_size=config.hidden_size,
             num_attention_heads=config.num_attention_heads,
             num_key_value_heads=config.num_key_value_heads,
             max_position_embeddings=config.max_position_embeddings,
             bias=False,
-            pos_embd_params=PositionalEmbeddingParams(
-                type=PositionEmbeddingType.rope_gpt_neox,
-                rope=RopeParams.from_config(config),
-            ),
+            pos_embd_params=pos_embd_params,
             layer_idx=layer_idx,
             dtype=config.torch_dtype,
             config=model_config,
@@ -266,20 +279,18 @@ class MistralCommonImageProcessor:
         }
 
     def get_num_tokens_per_image(self, image_sizes):
-        # FIXME avoid double loading with custom loader
         h, w = image_sizes
         ncols, nrows = self.image_processor._image_to_num_tokens(
             Image.new("RGB", (w, h)))
         return ncols * nrows + nrows
 
-    def __call__(self, text, images, media, **kwargs):
-        assert media is not None
-        if isinstance(media, str):
-            media = [media]
-
-        mm_items = [{"type": "image_url", "image_url": url} for url in media]
-
-        logger.debug(f"text: {text}")
+    def __call__(self, text, images, **kwargs):
+        mm_items = []
+        if images:
+            mm_items = [{
+                "type": "image",
+                "base64": encode_base64_image(image)
+            } for image in images]
 
         conversation = [{
             "role": "user",
@@ -292,19 +303,20 @@ class MistralCommonImageProcessor:
         encoded = self.tokenizer.transformers_tokenizer.apply_chat_template(
             conversation, tokenize=True, return_dict=True, return_tensors='pt')
 
-        logger.debug(
-            f"encoded.pixel_values.shape: {encoded.pixel_values.shape}, encoded.input_ids: {encoded.input_ids[0][-20:]}"
-        )
-        logger.debug(
-            f"encoded.input_ids list: {self.tokenizer.transformers_tokenizer.apply_chat_template(conversation)}"
-        )
-
         processed = {
             "input_ids": encoded.input_ids,
-            "pixel_values": encoded.pixel_values.to(self.dtype),
-            "attention_mask": encoded.attention_mask,
-            "image_sizes": torch.tensor([encoded.pixel_values.shape[2:]])
         }
+
+        # text-only mode for VLM
+        if "pixel_values" in encoded:
+            processed.update({
+                "pixel_values":
+                encoded.pixel_values.to(self.dtype),
+                "attention_mask":
+                encoded.attention_mask,
+                "image_sizes":
+                torch.tensor([encoded.pixel_values.shape[2:]])
+            })
         return processed
 
 
@@ -376,7 +388,6 @@ class Mistral3InputProcessor(BaseMultimodalInputProcessor,
         self, inputs: TextPrompt, sampling_params: SamplingParams
     ) -> Tuple[List[int], ExtraProcessedInputs | None]:
         images = inputs.get("multi_modal_data", {}).get("image")
-        mm_processor_kwargs = inputs.get("mm_processor_kwargs", {})
         do_rescale = getattr(self.processor.image_processor, "do_rescale",
                              False)
         if images is not None and isinstance(images[0], torch.Tensor):
@@ -384,18 +395,15 @@ class Mistral3InputProcessor(BaseMultimodalInputProcessor,
             # format is "pt" (pytorch tensors), but not for "pil" (PIL images).
             do_rescale = False
 
-        if mm_processor_kwargs:
-            # Currently, we only support image modality in MistralCommonImageProcessor.
+        if images is not None:
             processed = self.processor(
                 text=inputs["prompt"],
                 images=images,
                 do_rescale=do_rescale,
-                **mm_processor_kwargs,
             )
         else:
             processed = self.text_processor(
                 text=inputs["prompt"],
-                images=images,
                 do_rescale=do_rescale,
             )
         input_ids = processed.pop("input_ids").tolist()[0]
diff --git a/tensorrt_llm/evaluate/lm_eval.py b/tensorrt_llm/evaluate/lm_eval.py
index a3a59c3f5f..4a877d75f4 100644
--- a/tensorrt_llm/evaluate/lm_eval.py
+++ b/tensorrt_llm/evaluate/lm_eval.py
@@ -52,7 +52,9 @@ class LmEvalWrapper(TemplateLM):
                  llm: Union[LLM, PyTorchLLM],
                  sampling_params: Optional[SamplingParams] = None,
                  streaming: bool = False,
-                 chat_template_kwargs: Optional[dict[str, Any]] = None):
+                 chat_template_kwargs: Optional[dict[str, Any]] = None,
+                 model_type: str | None = None,
+                 is_force_single_image: bool = False):
         super().__init__()
         self.llm = llm
         self.sampling_params = sampling_params
@@ -163,7 +165,9 @@ class MultimodalLmEvalWrapper(LmEvalWrapper):
                  sampling_params: Optional[SamplingParams] = None,
                  streaming: bool = False,
                  max_images: int = 999,
-                 chat_template_kwargs: Optional[dict[str, Any]] = None):
+                 chat_template_kwargs: Optional[dict[str, Any]] = None,
+                 model_type: str | None = None,
+                 is_force_single_image: bool = False):
         """
         Initialize the multimodal wrapper.
 
@@ -179,7 +183,9 @@ class MultimodalLmEvalWrapper(LmEvalWrapper):
         self.MULTIMODAL = True
         self.max_images = max_images
         self.chat_template_kwargs = chat_template_kwargs
-        self.model_type = self._get_model_type(llm)
+        self.model_type = model_type if model_type is not None else self._get_model_type(
+            llm)
+        self.is_force_single_image = is_force_single_image
 
         # NOTE: In TRT-LLM, currently we do not support interleaved text and image. Instead, we are adding image placeholders at the end of the text or at the beginning of the text.
         # So, until we support interleaved text and image, we set this to False.
@@ -287,9 +293,14 @@ class MultimodalLmEvalWrapper(LmEvalWrapper):
             prompt = prompt_inputs(prompt)
 
             # NOTE: Convert RGBA format to RGB format
-            images = [
-                convert_image_mode(img, "RGB") for img in media_data["visual"]
-            ]
+            if self.is_force_single_image:
+                # NOTE: This is a workaround to force single image for models which only support single image.
+                images = [convert_image_mode(media_data["visual"][0], "RGB")]
+            else:
+                images = [
+                    convert_image_mode(img, "RGB")
+                    for img in media_data["visual"]
+                ]
             prompt["multi_modal_data"] = {"image": images}
 
             sampling_params = self._get_sampling_params(gen_kwargs)
@@ -429,14 +440,18 @@ class LmEvalEvaluator(Evaluator):
                  llm: Union[LLM, PyTorchLLM],
                  sampling_params: Optional[SamplingParams] = None,
                  streaming: bool = False,
-                 scores_filter: str = None) -> float:
+                 scores_filter: str = None,
+                 model_type: str = None,
+                 is_force_single_image: bool = False) -> float:
         import lm_eval
         lm_cls = MultimodalLmEvalWrapper if self.MULTIMODAL else LmEvalWrapper
         results = lm_eval.evaluate(
             lm=lm_cls(llm,
                       sampling_params=sampling_params,
                       streaming=streaming,
-                      chat_template_kwargs=self.chat_template_kwargs),
+                      chat_template_kwargs=self.chat_template_kwargs,
+                      model_type=model_type,
+                      is_force_single_image=is_force_single_image),
             task_dict=self.task_dict,
             limit=self.num_samples,
             apply_chat_template=self.apply_chat_template,
diff --git a/tensorrt_llm/inputs/utils.py b/tensorrt_llm/inputs/utils.py
index a6f7e49fa8..bbbd5f4f8f 100644
--- a/tensorrt_llm/inputs/utils.py
+++ b/tensorrt_llm/inputs/utils.py
@@ -774,12 +774,6 @@ def default_multimodal_input_loader(
             mm_placeholder_counts=[mm_placeholder_counts])
         input = {"prompt": prompt}
 
-        # When the tokenizer is a MistralTokenizer, we need to keep the source media to handle in processor later.
-        from tensorrt_llm._torch.models.checkpoints.mistral.tokenizer import \
-            MistralTokenizer
-        if isinstance(tokenizer, MistralTokenizer):
-            input["mm_processor_kwargs"] = {"media": media}
-
         if mm_placeholder_counts:
             if mm_embeddings is not None:
                 input[
diff --git a/tests/integration/defs/accuracy/accuracy_core.py b/tests/integration/defs/accuracy/accuracy_core.py
index f96ac7d618..e30c6e2c2c 100644
--- a/tests/integration/defs/accuracy/accuracy_core.py
+++ b/tests/integration/defs/accuracy/accuracy_core.py
@@ -402,6 +402,8 @@ class MMMU(AccuracyTask):
                             is_multimodal=True,
                             apply_chat_template=True)
 
+    EVALUATE_KWARGS = dict(model_type=None, is_force_single_image=False)
+
 
 class PassKeyRetrieval64k(AccuracyTask):
     DATASET = "passkey_retrieval_64k"
diff --git a/tests/integration/defs/accuracy/references/mmlu.yaml b/tests/integration/defs/accuracy/references/mmlu.yaml
index a0e38d67c1..9cbd7a9f73 100644
--- a/tests/integration/defs/accuracy/references/mmlu.yaml
+++ b/tests/integration/defs/accuracy/references/mmlu.yaml
@@ -345,9 +345,9 @@ mistralai/Mistral-Nemo-12b-Base:
   - quant_algo: FP8
     accuracy: 69.66
 mistral/Mistral-Large-3-675B:
-  - accuracy: 87.54
+  - accuracy: 85.30
   - spec_dec_algo: Eagle
-    accuracy: 87.54
+    accuracy: 85.30
 nvidia/Nemotron-Super-V3:
   - accuracy: 81.07
   - quant_algo: NVFP4
diff --git a/tests/integration/defs/accuracy/references/mmmu.yaml b/tests/integration/defs/accuracy/references/mmmu.yaml
index a2fb8f4a77..37819c3f14 100644
--- a/tests/integration/defs/accuracy/references/mmmu.yaml
+++ b/tests/integration/defs/accuracy/references/mmmu.yaml
@@ -25,4 +25,5 @@ microsoft/Phi-4-multimodal-instruct:
 Qwen/Qwen3-VL-30B-A3B-Instruct:
   - accuracy: 55.33
 mistral/Mistral-Large-3-675B:
-  - accuracy: 60.00
+# Mistral Large 3 675B only supports single image input, so accuracy is lower.
+  - accuracy: 47
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py b/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py
index 78e0f3e401..c3a812b195 100644
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py
@@ -293,8 +293,19 @@ class TestMistralLarge3_675B(LlmapiAccuracyTestHarness):
         ],
     )
     def test_nvfp4_4gpus(
-        self, tp_size, pp_size, ep_size, attention_dp, cuda_graph, overlap_scheduler, moe_backend
+        self,
+        tp_size,
+        pp_size,
+        ep_size,
+        attention_dp,
+        cuda_graph,
+        overlap_scheduler,
+        moe_backend,
+        mocker,
     ):
+        mocker.patch.dict(
+            MMMU.EVALUATE_KWARGS, {"model_type": "mistral_large_3", "is_force_single_image": True}
+        )
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             cuda_graph_config=CudaGraphConfig() if cuda_graph else None,
@@ -315,4 +326,4 @@ class TestMistralLarge3_675B(LlmapiAccuracyTestHarness):
             kv_cache_config=kv_cache_config,
         ) as llm:
             task = MMMU(self.MODEL_NAME)
-            task.evaluate(llm, sampling_params=self.sampling_params, model_type="mistral_large_3")
+            task.evaluate(llm, sampling_params=self.sampling_params)
diff --git a/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml b/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml
index 2241aea415..62c0af24f8 100644
--- a/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml
+++ b/tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml
@@ -72,7 +72,7 @@ l0_gb200_multi_gpus:
   - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp4ep4-cutlass]
   - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[no_cuda_graph_overlap-cutlass]
   - accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp4ep4-trtllm]
-  - accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm_eagle] TIMEOUT (90)
+  - accuracy/test_llm_api_pytorch_multimodal.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] TIMEOUT (90)
 - condition:
     ranges:
       system_gpu_count:
@@ -105,4 +105,4 @@ l0_gb200_multi_gpus:
   - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus_online_eplb[enable_configurable_moe-fp8]
   - accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] TIMEOUT (90)
   - accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] TIMEOUT (90)
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] TIMEOUT (90)
+  - accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm_eagle] TIMEOUT (90)
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index ec161196b8..563a38a76e 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -441,8 +441,6 @@ test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Nemotron-Super-49B-v1-BF16
 unittest/_torch/multi_gpu/test_mnnvl_allreduce.py::test_row_linear_residual_norm_fusion[no_fusion-strategy:8-dtype:bfloat16-hidden:8192-seqlen:[15]] SKIP (https://nvbugs/5761364)
 triton_server/test_triton.py::test_gpt_speculative_decoding[gpt-speculative-decoding] SKIP (https://nvbugs/5762854)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B_Instruct_RocketKV::test_auto_dtype SKIP (https://nvbugs/5762822)
-accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] SKIP (https://nvbugs/5762852)
-accuracy/test_llm_api_pytorch.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm_eagle] SKIP (https://nvbugs/5762852)
 unittest/_torch/sampler/test_return_logits.py SKIP (https://nvbugs/5764627)
 examples/serve/test_serve.py::test_config_file_loading[--config] SKIP (https://nvbugs/5754977)
 full:RTXPro6000D/accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740075)
@@ -456,7 +454,6 @@ full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram SKIP (https://nvbugs/5768068)
 test_e2e.py::test_eagle3_output_consistency_4gpus[Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf-Qwen3/qwen3-235B-eagle3] SKIP (https://nvbugs/5685010)
 examples/test_mistral.py::test_mistral_with_bf16_lora_torch[mistral-7b-v0.1] SKIP (https://nvbugs/5769855)
-accuracy/test_llm_api_pytorch_multimodal.py::TestMistralLarge3_675B::test_nvfp4_4gpus[latency_moe_trtllm] SKIP (TBD)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm-fp8] SKIP (https://nvbugs/5772396)
 full:sm100/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm-auto] SKIP (https://nvbugs/5772396)
 accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_2_model_mtp[2model_trtllm] SKIP (https://nvbugs/5772360)

From 6511dbaea0a9197a07f546e9bfc307a583c14fc8 Mon Sep 17 00:00:00 2001
From: Eran Geva <19514940+MrGeva@users.noreply.github.com>
Date: Thu, 8 Jan 2026 13:43:41 +0200
Subject: [PATCH 14/16] [#10417][fix] AutoDepoloy - Reverted to direct
 computation of minusA (#10509)

Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com>
---
 .../models/custom/modeling_nemotron_h.py            | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py b/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py
index 40dc215b34..259f997afd 100644
--- a/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py
+++ b/tensorrt_llm/_torch/auto_deploy/models/custom/modeling_nemotron_h.py
@@ -113,11 +113,6 @@ class NemotronHMamba2Mixer(nn.Module):
         A = torch.arange(1, self.num_heads + 1)
         self.A_log = nn.Parameter(torch.log(A))
         self.A_log._no_weight_decay = True
-        # Instead of recomputing `torch.exp(self.A_log.float())` on every forward pass, we will register a hook
-        # that sets this appropriately when loading weights.
-        # NOTE: we explicitly register this as a non-persistent buffer so that it does not appear in the state dict of
-        # this module, or an equivalent graph module trace from it, but still gets included in e.g. `to()` calls.
-        self.register_buffer("_minus_A", -A.float(), persistent=False)
         self.norm = MambaRMSNormGated(
             self.intermediate_size,
             eps=self.layer_norm_epsilon,
@@ -129,8 +124,6 @@ class NemotronHMamba2Mixer(nn.Module):
         self.out_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=config.use_bias)
         self.use_bias = config.use_bias
 
-        self.register_load_state_dict_post_hook(self._load_state_dict_post_hook)
-
     def torch_forward(self, input_states):
         batch_size, seq_len, _ = input_states.shape
         dtype = input_states.dtype
@@ -166,7 +159,7 @@ class NemotronHMamba2Mixer(nn.Module):
         )
 
         # 3. SSM transformation
-        A = self._minus_A
+        A = -torch.exp(self.A_log.float())
         y = torch.ops.auto_deploy.torch_ssm(
             hidden_states=hidden_states.view(batch_size, seq_len, -1, self.head_dim),
             A=A,
@@ -193,10 +186,6 @@ class NemotronHMamba2Mixer(nn.Module):
     def forward(self, hidden_states):
         return self.torch_forward(hidden_states)
 
-    @staticmethod
-    def _load_state_dict_post_hook(module, incompatible_keys) -> None:
-        module._minus_A.data = -torch.exp(module.A_log.float())
-
 
 class NemotronHRMSNorm(nn.Module):
     def __init__(self, hidden_size, eps=1e-6):

From c0ae6bbdbea3a0b31b03769b6a362405d8adb844 Mon Sep 17 00:00:00 2001
From: William Zhang <133824995+2ez4bz@users.noreply.github.com>
Date: Thu, 8 Jan 2026 03:45:54 -0800
Subject: [PATCH 15/16] [None][feat] EPD for Qwen3 VL (#10470)

* Why?

We would like to support EPD disaggregated serving for Qwen3 VL.

* What?

This commit adds such support, and extends existing unit tests for
correctness checks.

Some minor (protected) interface changes had to be made to the
weight mapper as a side-effect.

Signed-off-by: William Zhang <133824995+2ez4bz@users.noreply.github.com>
---
 .../models/checkpoints/base_weight_mapper.py  |  11 +-
 .../checkpoints/hf/qwen3vl_weight_mapper.py   |  19 +++
 .../_torch/models/modeling_qwen3vl.py         | 110 ++++++++++++++++--
 .../multimodal/test_mm_encoder_standalone.py  |   5 +-
 4 files changed, 132 insertions(+), 13 deletions(-)

diff --git a/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py b/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py
index 4d78b3dcb1..790be65eed 100644
--- a/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py
+++ b/tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py
@@ -29,9 +29,6 @@ class BaseWeightMapper(ABC):
             raise ValueError("model must have a config attribute")
 
         self._tp_size = 1 if model.model_config.mapping.enable_attention_dp else model.model_config.mapping.tp_size
-        self._head_dim = model.config.head_dim if hasattr(
-            model.config, 'head_dim'
-        ) and model.config.head_dim is not None else model.config.hidden_size // model.config.num_attention_heads
 
         self.map_weights()
 
@@ -173,3 +170,11 @@ class BaseWeightMapper(ABC):
         if self._model is None:
             raise RuntimeError("Weight mapper is not initialized")
         return self._model
+
+    @property
+    def _head_dim(self) -> int:
+        model = self.model
+        head_dim = model.config.head_dim if hasattr(
+            model.config, 'head_dim'
+        ) and model.config.head_dim is not None else model.config.hidden_size // model.config.num_attention_heads
+        return head_dim
diff --git a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py
index 41b3da875e..24a3602db9 100644
--- a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py
+++ b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_weight_mapper.py
@@ -1,3 +1,8 @@
+from transformers.models.qwen3_vl.configuration_qwen3_vl import (
+    Qwen3VLTextConfig,
+    Qwen3VLVisionConfig,
+)
+
 from tensorrt_llm._torch.models.checkpoints.hf.weight_mapper import HfWeightMapper
 from tensorrt_llm._torch.models.modeling_utils import register_mapper
 
@@ -6,3 +11,17 @@ from tensorrt_llm._torch.models.modeling_utils import register_mapper
 class Qwen3VLHfWeightMapper(HfWeightMapper):
     def preprocess_weights(self, weights: dict) -> dict:
         return weights
+
+    @property
+    def _head_dim(self) -> int:
+        config = self.model.config
+        if (head_dim := getattr(config, "head_dim", None)) is not None:
+            return head_dim
+        if isinstance(config, Qwen3VLTextConfig):
+            num_heads = config.num_attention_heads
+        elif isinstance(config, Qwen3VLVisionConfig):
+            num_heads = config.num_heads
+        else:
+            raise TypeError(f"Unexpected config class {type(config).__name__}.")
+
+        return config.hidden_size // num_heads
diff --git a/tensorrt_llm/_torch/models/modeling_qwen3vl.py b/tensorrt_llm/_torch/models/modeling_qwen3vl.py
index f89d801f94..d073f6745b 100644
--- a/tensorrt_llm/_torch/models/modeling_qwen3vl.py
+++ b/tensorrt_llm/_torch/models/modeling_qwen3vl.py
@@ -25,6 +25,7 @@ from ...inputs import (
     MultimodalPlaceholderPlacement,
     TextPrompt,
     register_input_processor,
+    support_multimodal_disaggregated,
 )
 from ...inputs.multimodal import MultimodalParams
 from ...logger import logger
@@ -350,6 +351,85 @@ class Qwen3VLInputProcessorBase(BaseMultimodalInputProcessor, BaseMultimodalDumm
             "multimodal_data": multimodal_data,
         }
 
+    def get_prompt_token_ids(
+        self, inputs: TextPrompt, mm_handles: List[Dict[str, Any]]
+    ) -> Tuple[List[int], List[int], List[int]]:
+        """
+        Build input token ids with multimodal placeholders expanded to the number of MM tokens.
+
+        Args:
+            inputs: Text prompt input container. Must contain a non-empty prompt string.
+            mm_handles: List of multimodal embedding handles. Currently only a single handle is supported.
+
+        Returns:
+            Tuple[List[int], List[int], List[int]]:
+                - expanded_ids: token ids with each image token expanded to a placeholder repeated per MM token
+                - mm_token_length: per-image MM token lengths
+                - mm_token_offsets: start offsets (positions) for each image's MM tokens within expanded_ids
+        """
+        # TODO: Move this function to the base input processor class when extending for more models
+        text_prompt = inputs.get("prompt")
+        if not text_prompt:
+            raise ValueError("Text prompt is required but not provided")
+
+        if not isinstance(mm_handles, list):
+            raise TypeError("mm_handles must be a list")
+
+        if len(mm_handles) > 1:
+            # TODO: only support single multimodal item within a request for now
+            raise NotImplementedError("Only one mm_handle is supported for Qwen3 VL for now")
+
+        hidden_size = mm_handles[0]["tensor_size"][1]
+        num_deepstack_levels = len(self.config.vision_config.deepstack_visual_indexes)
+        # This is because, unlike previous Qwen VL models, the embeddings are concatenated with
+        # feature maps from deepstack layers.
+        expected_size = self.config.text_config.hidden_size * (1 + num_deepstack_levels)
+        if hidden_size != expected_size:
+            raise RuntimeError(
+                f"Expected multimodal embedding to have hidden size {expected_size}, got {hidden_size}."
+            )
+
+        input_ids = self.tokenizer(text_prompt, return_tensors="pt").input_ids[0]
+
+        # TODO: what about `video_token_id`?
+        image_token_index = self.config.image_token_id
+
+        image_mask = input_ids == image_token_index
+        image_positions = torch.where(image_mask)[0]
+        num_images = len(image_positions)
+        assert num_images == len(mm_handles), "Number of images must match number of mm_handles"
+        total_mm_tokens = sum(mm_handle["tensor_size"][0] for mm_handle in mm_handles)
+        final_length = len(input_ids) - num_images + total_mm_tokens
+        # Create output tensor
+        expanded_ids = torch.empty(final_length, dtype=input_ids.dtype)
+        placeholder_id = self.tllm_multimodal_token_id
+
+        # Fill the expanded sequence
+        write_pos = 0
+        image_cnt = 0
+        mm_token_length = []
+        mm_token_offsets = []
+        for read_pos in range(len(input_ids)):
+            if input_ids[read_pos] == image_token_index:
+                # Replace with placeholder id
+                mm_token_num = mm_handles[image_cnt]["tensor_size"][0]
+                expanded_ids[write_pos : write_pos + mm_token_num] = placeholder_id
+                mm_token_offsets.append(write_pos)
+                mm_token_length.append(mm_token_num)
+                write_pos += mm_token_num
+                image_cnt += 1
+            else:
+                # Copy text token as-is
+                expanded_ids[write_pos] = input_ids[read_pos]
+                write_pos += 1
+
+        assert write_pos == final_length, f"Write position mismatch: {write_pos} != {final_length}"
+        assert mm_token_length[-1] + mm_token_offsets[-1] <= final_length, (
+            f"mm_token_length[-1] + mm_token_offsets[-1] ({mm_token_length[-1] + mm_token_offsets[-1]}) should be less "
+            f"than or equal to final_length ({final_length})"
+        )
+        return expanded_ids.to(torch.int32).tolist(), mm_token_length, mm_token_offsets
+
 
 class Qwen3VLVisionAttention(Qwen2_5_VLVisionAttention):
     def __init__(self, model_config, layer_idx):
@@ -825,6 +905,7 @@ class Qwen3VLModelBase(PreTrainedModel):
             llm_model_config.pretrained_config.architectures = ["Qwen3MoeForCausalLM"]
         else:
             raise ValueError(f"Unsupported architecture: {self.original_arch}")
+        # Qwen3ForCausalLM.
         self.llm = AutoModelForCausalLM.from_config(llm_model_config)
 
         if not _is_disagg():
@@ -953,22 +1034,16 @@ class Qwen3VLModelBase(PreTrainedModel):
 
         # NOTE: Qwen*-VL series has mrope_config even on the text-only prompts,
         # so we need to separate the mm_multimodal_params from the text-only prompts.
-        mm_multimodal_params = [
-            multimodal_param
-            for multimodal_param in multimodal_params
-            if multimodal_param.multimodal_data.get("image", {}).get("pixel_values") is not None
-            or multimodal_param.multimodal_data.get("video", {}).get("pixel_values_videos")
-            is not None
-        ]
+        mm_multimodal_params = self._get_requests_with_mm_data(multimodal_params)
         if len(mm_multimodal_params) > 0:
             if not _is_disagg():
                 mm_embeds = get_multimodal_embeddings(
                     encoder_forward_fn=self.mm_encoder.forward,
                     multimodal_params=mm_multimodal_params,
                 )
-            else:
+            elif not getattr(self, "support_mm_disagg", False):
                 raise NotImplementedError(
-                    "Qwen3VLModel does not support disaggregated inference yet. Please unset "
+                    f"{type(self)} does not support disaggregated inference yet. Please unset "
                     "the TLLM_MULTIMODAL_DISAGGREGATED environment variable, or set it to '0'."
                 )
             mm_embeds = find_input_mm_embeds(mm_embeds, mm_multimodal_params)
@@ -1008,7 +1083,24 @@ class Qwen3VLModelBase(PreTrainedModel):
         logger.debug(f"output shape: {output_prob.shape}")
         return output_prob
 
+    def _get_requests_with_mm_data(self, multimodal_params):
+        mm_multimodal_params = []
+        for multimodal_param in multimodal_params:
+            data = multimodal_param.multimodal_data
+            if (
+                # The first 2 conditions check whether there is input on which inference should be run.
+                data.get("image", {}).get("pixel_values") is not None
+                or data.get("video", {}).get("pixel_values_videos") is not None
+                # This condition corresponds to when the embeddings are already populated, as is e.g.
+                # the case in EPD disagg in the prefill worker.
+                or data.get("multimodal_embedding")
+            ):
+                mm_multimodal_params.append(multimodal_param)
 
+        return mm_multimodal_params
+
+
+@support_multimodal_disaggregated
 @register_vision_encoder(Qwen3VisionModelBase, vlm_base_model=Qwen3VisionModel)
 @register_auto_model("Qwen3VLForConditionalGeneration")
 @register_input_processor(
diff --git a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py
index 99154dd074..993559879b 100644
--- a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py
+++ b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py
@@ -21,10 +21,12 @@ example_images = [
 
 _LLAVA_DIR = llm_models_root() / "multimodals" / "llava-v1.6-mistral-7b-hf"
 _QWEN_2_5_VL_DIR = llm_models_root() / "Qwen2.5-VL-3B-Instruct"
+_QWEN_3_VL_DIR = llm_models_root() / "Qwen3" / "Qwen3-VL-2B-Instruct"
 
 
 # TODO: Add multi-image in single chat test
-@pytest.mark.parametrize("model_dir", [_LLAVA_DIR, _QWEN_2_5_VL_DIR])
+@pytest.mark.parametrize("model_dir",
+                         [_LLAVA_DIR, _QWEN_2_5_VL_DIR, _QWEN_3_VL_DIR])
 @pytest.mark.parametrize("pd_disagg", [False, True])
 def test_single_image_chat(model_dir, pd_disagg):
     """Test processing single image using encoder (pass mm_embeddings) + LLM API.
@@ -180,6 +182,7 @@ def test_single_image_chat(model_dir, pd_disagg):
         # Qwen2.5 VL's vision encoder seems to output different embeddings based on this value.
         # The test only passes with this set to 1.
         (_QWEN_2_5_VL_DIR, 1),
+        (_QWEN_3_VL_DIR, 3),
     ],
 )
 def test_multi_request_batch_chat(model_dir, encoder_max_batch_size):

From e0331297a620475fc41791a5906e09b453af7335 Mon Sep 17 00:00:00 2001
From: mpikulski <206748156+ixlmar@users.noreply.github.com>
Date: Thu, 8 Jan 2026 12:47:39 +0100
Subject: [PATCH 16/16] [TRTLLM-9522][fix] broken cast (#9975)

Signed-off-by: ixlmar <206748156+ixlmar@users.noreply.github.com>
---
 tensorrt_llm/llmapi/llm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py
index 6d3410bf3c..ac869d765a 100644
--- a/tensorrt_llm/llmapi/llm.py
+++ b/tensorrt_llm/llmapi/llm.py
@@ -491,8 +491,8 @@ class BaseLLM:
             elif 'multi_modal_embeddings' in inputs:
                 mm_embedding_info = inputs['multi_modal_embeddings']
                 prompt_token_ids, extra_processed_inputs = cast(
-                    self.input_processor,
-                    BaseMultimodalInputProcessor).attach_multimodal_embeddings(
+                    BaseMultimodalInputProcessor,
+                    self.input_processor).attach_multimodal_embeddings(
                         inputs, mm_embedding_info, sampling_params)
             else:
                 with nvtx_range_debug("input_processor"):