diff --git a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3_moe_weight_mapper.py b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3_moe_weight_mapper.py
index 1c12b2bd5f..2ad24e129b 100644
--- a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3_moe_weight_mapper.py
+++ b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3_moe_weight_mapper.py
@@ -16,9 +16,6 @@ class Qwen3MoeHfWeightMapper(Qwen2MoeHfWeightMapper):
                                                  DecoderModelForCausalLM],
                               config: ModelConfig):
         super().init_model_and_config(model, config)
-        self._num_kv_heads = model.config.num_key_value_heads if hasattr(
-            model.config, 'num_key_value_heads'
-        ) and model.config.num_key_value_heads is not None else model.config.num_attention_heads
 
     def should_skip_module(self, module_name: str) -> bool:
         if module_name.startswith("draft_model"):
@@ -49,3 +46,11 @@ class Qwen3MoeHfWeightMapper(Qwen2MoeHfWeightMapper):
             return processed_weights
 
         return weights
+
+    @property
+    def _num_kv_heads(self) -> int:
+        num_kv_heads = self._model.config.num_key_value_heads if hasattr(
+            self._model.config, 'num_key_value_heads'
+        ) and self._model.config.num_key_value_heads is not None else self._model.config.num_attention_heads
+
+        return num_kv_heads
diff --git a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_moe_weight_mapper.py b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_moe_weight_mapper.py
index cb72762c5d..12bddd4da8 100644
--- a/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_moe_weight_mapper.py
+++ b/tensorrt_llm/_torch/models/checkpoints/hf/qwen3vl_moe_weight_mapper.py
@@ -1,4 +1,8 @@
 from torch import nn
+from transformers.models.qwen3_vl_moe.configuration_qwen3_vl_moe import (
+    Qwen3VLMoeTextConfig,
+    Qwen3VLMoeVisionConfig,
+)
 
 from tensorrt_llm._torch.models.checkpoints.hf.qwen3_moe_weight_mapper import Qwen3MoeHfWeightMapper
 from tensorrt_llm._torch.models.modeling_utils import register_mapper
@@ -22,3 +26,20 @@ class Qwen3VLMoeHfWeightMapper(Qwen3MoeHfWeightMapper):
             module.load_weights(
                 weights=[updated_module_weights], allow_partial_loading=allow_partial_loading
             )
+
+    @property
+    def _num_kv_heads(self) -> int:
+        config = self._model.config
+        if isinstance(config, Qwen3VLMoeTextConfig):
+            num_kv_heads = getattr(config, "num_key_value_heads", None)
+            if num_kv_heads is None:
+                num_kv_heads = config.num_attention_heads
+        elif isinstance(config, Qwen3VLMoeVisionConfig):
+            num_kv_heads = config.num_heads
+        else:
+            raise TypeError(
+                "Expected `Qwen3VLMoeTextConfig` or `Qwen3VLMoeVisionConfig`, "
+                f"got {type(config).__name__}"
+            )
+
+        return num_kv_heads
diff --git a/tensorrt_llm/_torch/models/modeling_qwen3vl_moe.py b/tensorrt_llm/_torch/models/modeling_qwen3vl_moe.py
index a7a0050383..74cd16ec69 100644
--- a/tensorrt_llm/_torch/models/modeling_qwen3vl_moe.py
+++ b/tensorrt_llm/_torch/models/modeling_qwen3vl_moe.py
@@ -9,6 +9,7 @@ from ...inputs import (
     MultimodalPlaceholderMetadata,
     MultimodalPlaceholderPlacement,
     register_input_processor,
+    support_multimodal_disaggregated,
 )
 from .checkpoints.base_weight_mapper import BaseWeightMapper
 from .checkpoints.hf.qwen3vl_moe_weight_mapper import Qwen3VLMoeHfWeightMapper
@@ -21,6 +22,14 @@ from .modeling_qwen3vl import (
 from .modeling_utils import ModelConfig, register_auto_model, register_vision_encoder
 
 
+# NOTE: this is technically not strictly necessary, since the underlying mechanism for registering
+# support is tacked onto the input processor class (`Qwen3VLInputProcessorBase`). Given that
+# the `Qwen3VLModel` (defined via the import of `modeling_qwen3vl.py` in this file) has that
+# decorator applied to it, and uses the same input processor class, we get it "for free" here.
+# However, we keep it here to explicitly signify intent that this is supported. This also shields
+# it from e.g. the input processor classes becoming specialized between `Qwen3VLModel` and the
+# below MoE class.
+@support_multimodal_disaggregated
 @register_vision_encoder(Qwen3VisionModelBase, vlm_base_model=Qwen3VisionModel)
 @register_auto_model("Qwen3VLMoeForConditionalGeneration")
 @register_input_processor(
diff --git a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py
index 2352329ccd..4cc6e2e19d 100644
--- a/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py
+++ b/tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py
@@ -1,3 +1,4 @@
+import copy
 import json
 import os
 import time
@@ -11,9 +12,10 @@ from utils.llm_data import llm_models_root
 
 from tensorrt_llm import MultimodalEncoder
 from tensorrt_llm._torch.shared_tensor import SharedTensorContainer
+from tensorrt_llm._utils import get_sm_version
 from tensorrt_llm.inputs import default_multimodal_input_loader
 from tensorrt_llm.llmapi import (CacheTransceiverConfig, DisaggregatedParams,
-                                 KvCacheConfig)
+                                 KvCacheConfig, MoeConfig)
 from tensorrt_llm.llmapi.llm import LLM, SamplingParams
 
 test_data_root = Path(
@@ -27,6 +29,67 @@ example_images = [
 _LLAVA_DIR = llm_models_root() / "multimodals" / "llava-v1.6-mistral-7b-hf"
 _QWEN_2_5_VL_DIR = llm_models_root() / "Qwen2.5-VL-3B-Instruct"
 _QWEN_3_VL_DIR = llm_models_root() / "Qwen3" / "Qwen3-VL-2B-Instruct"
+_QWEN_3_VL_30B_A3B_FP8_DIR = llm_models_root(
+) / "Qwen3" / "Qwen3-VL-30B-A3B-Instruct-FP8"
+
+_FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL = "qwen3_vl_30b_a3b_fp8_fake"
+_FAKE_CHECKPOINT_MARKER = ".tllm_fake_checkpoint"
+
+
+# Unlike the other models, we cannot fit a multimodal encoder + 2 copies of the LLM on a single
+# H100 GPU in CI. We therefore resort to creating a slimmed down version of the model with less
+# layers.
+def _get_fake_qwen3_vl_30b_a3b_config() -> dict:
+    config_path = _QWEN_3_VL_30B_A3B_FP8_DIR / "config.json"
+    if not config_path.exists():
+        pytest.skip(f"Qwen3-VL-30B-A3B config not found: {config_path}")
+    with open(config_path, "r") as f:
+        config = json.load(f)
+    config = copy.deepcopy(config)
+    config["text_config"]["num_hidden_layers"] = 2
+    return config
+
+
+def _create_fake_qwen3_vl_30b_a3b_fp8_dir(
+    tmp_path_factory: pytest.TempPathFactory,
+    assets_dir: Path,
+) -> Path:
+    if not assets_dir.exists():
+        pytest.skip(f"Base model dir not found: {assets_dir}")
+
+    fake_dir = tmp_path_factory.mktemp("qwen3_vl_30b_a3b_fp8_fake")
+
+    for item in assets_dir.iterdir():
+        if item.name == "config.json":
+            continue
+        target = fake_dir / item.name
+        if target.exists():
+            continue
+        os.symlink(item, target, target_is_directory=item.is_dir())
+
+    config_path = fake_dir / "config.json"
+    with open(config_path, "w") as f:
+        json.dump(_get_fake_qwen3_vl_30b_a3b_config(), f, indent=2)
+
+    (fake_dir /
+     _FAKE_CHECKPOINT_MARKER).write_text("Synthetic checkpoint for CI tests.\n")
+    return fake_dir
+
+
+def _get_fake_checkpoint_kwargs(model_dir: Path) -> dict:
+    if (model_dir / _FAKE_CHECKPOINT_MARKER).exists():
+        return {"load_format": "dummy"}
+    return {}
+
+
+def _is_fake_checkpoint(model_dir: Path) -> bool:
+    return (model_dir / _FAKE_CHECKPOINT_MARKER).exists()
+
+
+def _get_moe_config_for_blackwell() -> MoeConfig:
+    if get_sm_version() >= 100:
+        return MoeConfig(backend="DEEPGEMM")
+    return MoeConfig()
 
 
 @pytest.mark.parametrize(
@@ -67,10 +130,12 @@ def test_kv_event_mm_keys_with_reuse(prompts, expected_num_duplicates):
         free_gpu_memory_fraction=free_gpu_memory_fraction,
         event_buffer_max_size=1024,  # Enable KV cache events
     )
+    moe_config = _get_moe_config_for_blackwell()
 
     llm = LLM(model=encoder_model_dir,
               backend='pytorch',
               kv_cache_config=kv_cache_config,
+              moe_config=moe_config,
               max_batch_size=1)
 
     inputs = _load_inputs(llm, prompts, media)
@@ -100,10 +165,20 @@ def test_kv_event_mm_keys_with_reuse(prompts, expected_num_duplicates):
         f"got {num_duplicates}. Offsets: {mm_keys_offsets}")
 
 
-@pytest.fixture(scope="module",
-                params=[_LLAVA_DIR, _QWEN_2_5_VL_DIR, _QWEN_3_VL_DIR],
-                ids=["llava_7b", "qwen2.5_3b", "qwen3_2b"])
-def model_dir(request) -> Path:
+@pytest.fixture(
+    scope="module",
+    params=[
+        pytest.param(_LLAVA_DIR, id="llava_7b"),
+        pytest.param(_QWEN_2_5_VL_DIR, id="qwen2.5_3b"),
+        pytest.param(_QWEN_3_VL_DIR, id="qwen3_2b"),
+        pytest.param(_FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL,
+                     id="qwen3_30b_a3b_fp8"),
+    ],
+)
+def model_dir(request, tmp_path_factory: pytest.TempPathFactory) -> Path:
+    if request.param == _FAKE_QWEN3_VL_30B_A3B_FP8_SENTINEL:
+        return _create_fake_qwen3_vl_30b_a3b_fp8_dir(tmp_path_factory,
+                                                     _QWEN_3_VL_DIR)
     return request.param
 
 
@@ -125,14 +200,18 @@ def llms(model_dir: Path,
         free_gpu_memory_fraction=free_gpu_memory_fraction,
     )
 
+    load_kwargs = _get_fake_checkpoint_kwargs(model_dir)
+    moe_config = _get_moe_config_for_blackwell()
     llm = LLM(
         model=model_dir,
         backend='pytorch',
         kv_cache_config=kv_cache_config,
+        moe_config=moe_config,
         trust_remote_code=True,
         cache_transceiver_config=cache_transceiver_cfg,
         disable_overlap_scheduler=disable_overlap_scheduler,
         max_batch_size=1,  # fix batch size to reduce non-determinism in tests
+        **load_kwargs,
     )
     with llm:
         if pd_disagg:
@@ -140,8 +219,10 @@ def llms(model_dir: Path,
                 model=model_dir,
                 backend='pytorch',
                 kv_cache_config=kv_cache_config,
+                moe_config=moe_config,
                 trust_remote_code=True,
                 cache_transceiver_config=cache_transceiver_cfg,
+                **load_kwargs,
             )
             with llm_decode:
                 yield (llm, llm_decode)
@@ -252,7 +333,9 @@ def test_single_image_chat(
 
     # Prepare inputs for llm (pass mm_embeddings)
     # Process multimodal data using encoder (pass mm_embeddings)
-    encoder = MultimodalEncoder(model=model_dir, max_batch_size=max_batch_size)
+    encoder = MultimodalEncoder(model=model_dir,
+                                max_batch_size=max_batch_size,
+                                **_get_fake_checkpoint_kwargs(model_dir))
     with encoder:
         encoder_outputs = encoder.generate(inputs)
 
@@ -393,13 +476,15 @@ def test_multi_request_batch_chat(
     embeddings alongside the prompt ("multi_modal_embeddings"), as well as the embedding
     handling within default_multimodal_input_loader.
     """
-    if use_mm_embeddings and model_dir in [_QWEN_2_5_VL_DIR, _QWEN_3_VL_DIR]:
+    if use_mm_embeddings and (model_dir in [_QWEN_2_5_VL_DIR, _QWEN_3_VL_DIR]
+                              or _is_fake_checkpoint(model_dir)):
         pytest.skip("Qwen does not implement attach_multimodal_embeddings")
 
     # Qwen2.5/3 VL's vision encoder seems to output different embeddings based on this value.
     # The test only passes with this set to 1.
-    encoder_max_batch_size = (1 if model_dir
-                              in [_QWEN_2_5_VL_DIR, _QWEN_3_VL_DIR] else 3)
+    encoder_max_batch_size = (1 if
+                              model_dir in [_QWEN_2_5_VL_DIR, _QWEN_3_VL_DIR]
+                              or _is_fake_checkpoint(model_dir) else 3)
 
     llm, llm_decode = llms
     if llm_decode is not None:
@@ -430,7 +515,8 @@ def test_multi_request_batch_chat(
         ) > 0, f"Reference generation has no output text for input {i}"
 
     encoder = MultimodalEncoder(model=model_dir,
-                                max_batch_size=encoder_max_batch_size)
+                                max_batch_size=encoder_max_batch_size,
+                                **_get_fake_checkpoint_kwargs(model_dir))
     with encoder:
         # Encoder path
         encoder_outputs = encoder.generate(inputs)