diff --git a/tests/integration/defs/accuracy/references/mmmu.yaml b/tests/integration/defs/accuracy/references/mmmu.yaml
index b6e8cb1321..a46c77ec2e 100644
--- a/tests/integration/defs/accuracy/references/mmmu.yaml
+++ b/tests/integration/defs/accuracy/references/mmmu.yaml
@@ -3,10 +3,17 @@ google/gemma-3-27b-it:
   - quant_algo: FP8
     kv_cache_quant_algo: FP8
     accuracy: 50.0
+  - quant_algo: NVFP4
+    kv_cache_quant_algo: FP8
+    accuracy: 48.0
 Qwen/Qwen2-VL-7B-Instruct:
   - accuracy: 48.44
 Qwen/Qwen2.5-VL-7B-Instruct:
   - accuracy: 51.22
+  - quant_algo: FP8
+    accuracy: 45.44
+  - quant_algo: NVFP4
+    accuracy: 40.67
 nvidia/Nano-v2-VLM:
   - accuracy: 43.78
 llava-hf/llava-v1.6-mistral-7b-hf:
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py b/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py
index 399fa66a4f..ef8b0ad6e8 100644
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch_multimodal.py
@@ -2,8 +2,9 @@ import pytest
 
 from tensorrt_llm import LLM
 from tensorrt_llm.llmapi import CudaGraphConfig, KvCacheConfig, MoeConfig, SamplingParams
+from tensorrt_llm.quantization import QuantAlgo
 
-from ..conftest import llm_models_root, skip_post_blackwell, skip_pre_blackwell, skip_pre_hopper
+from ..conftest import llm_models_root, skip_pre_blackwell, skip_pre_hopper
 from .accuracy_core import MMMU, LlmapiAccuracyTestHarness
 
 
@@ -54,6 +55,30 @@ class TestQwen2_5_VL_7B(LlmapiAccuracyTestHarness):
             task = MMMU(self.MODEL_NAME)
             task.evaluate(llm, sampling_params=self.sampling_params)
 
+    @skip_pre_hopper
+    def test_fp8(self):
+        model_path = f"{llm_models_root()}/multimodals/Qwen2.5-VL-7B-Instruct-FP8"
+        with LLM(
+            model_path,
+            max_num_tokens=self.MAX_NUM_TOKENS,
+            kv_cache_config=self.kv_cache_config,
+        ) as llm:
+            assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
+            task = MMMU(self.MODEL_NAME)
+            task.evaluate(llm, sampling_params=self.sampling_params)
+
+    @skip_pre_blackwell
+    def test_nvfp4(self):
+        model_path = f"{llm_models_root()}/multimodals/Qwen2.5-VL-7B-Instruct-FP4"
+        with LLM(
+            model_path,
+            max_num_tokens=self.MAX_NUM_TOKENS,
+            kv_cache_config=self.kv_cache_config,
+        ) as llm:
+            assert llm.args.quant_config.quant_algo == QuantAlgo.NVFP4
+            task = MMMU(self.MODEL_NAME)
+            task.evaluate(llm, sampling_params=self.sampling_params)
+
 
 class TestNano_V2_VLM(LlmapiAccuracyTestHarness):
     MODEL_NAME = "nvidia/Nano-v2-VLM"
@@ -217,7 +242,6 @@ class TestPhi4MMFusedVisionLora(LlmapiAccuracyTestHarness):
 
 
 @skip_pre_hopper
-@skip_post_blackwell
 class TestGemma3_27BInstruct(LlmapiAccuracyTestHarness):
     MODEL_NAME = "google/gemma-3-27b-it"
     # Note: This has only the LLM part quantized. Vision part is in bfloat16.
@@ -236,17 +260,28 @@ class TestGemma3_27BInstruct(LlmapiAccuracyTestHarness):
         dtype="fp8",
     )
 
-    def test_fp8_prequantized(self):
+    def _make_llm(self, model_path: str):
         # Gemma3 VLM needs FlashInfer attention backend for custom mask support.
-        with LLM(
-            self.MODEL_PATH,
+        return LLM(
+            model_path,
             max_batch_size=16,
             max_num_tokens=self.MAX_NUM_TOKENS,
             max_seq_len=8704,  # 8192 + 512.
             kv_cache_config=self.kv_cache_config,
             attn_backend="FLASHINFER",
             enable_chunked_prefill=False,
-        ) as llm:
+        )
+
+    def test_fp8_prequantized(self):
+        with self._make_llm(self.MODEL_PATH) as llm:
+            task = MMMU(self.MODEL_NAME)
+            task.evaluate(llm, sampling_params=self.sampling_params)
+
+    @skip_pre_blackwell
+    def test_nvfp4_prequantized(self):
+        model_path = f"{llm_models_root()}/gemma/gemma-3-27b-it-FP4"
+        with self._make_llm(model_path) as llm:
+            assert llm.args.quant_config.quant_algo == QuantAlgo.NVFP4
             task = MMMU(self.MODEL_NAME)
             task.evaluate(llm, sampling_params=self.sampling_params)
 
diff --git a/tests/integration/defs/perf/pytorch_model_config.py b/tests/integration/defs/perf/pytorch_model_config.py
index baf05df2fa..1b88bc524b 100644
--- a/tests/integration/defs/perf/pytorch_model_config.py
+++ b/tests/integration/defs/perf/pytorch_model_config.py
@@ -17,6 +17,8 @@
 Model pytorch/TRT yaml config for trtllm-bench perf tests
 """
 
+from ..conftest import llm_models_root
+
 
 def recursive_update(d, u):
     for k, v in u.items():
@@ -295,6 +297,32 @@ def get_model_yaml_config(model_label: str,
                 'num_postprocess_workers': 4
             }
         },
+        # GPT-OSS 120B speculative decoding (Eagle3 draft)
+        {
+            'patterns': [
+                'gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-maxnt:4096-input_output_len:2048,128-reqs:1-con:1',
+            ],
+            'config': {
+                'enable_attention_dp': False,
+                'disable_overlap_scheduler': False,
+                'enable_autotuner': False,
+                'enable_chunked_prefill': True,
+                'cuda_graph_config': {
+                    'enable_padding': True,
+                },
+                'speculative_config': {
+                    'decoding_type':
+                    'Eagle',
+                    'max_draft_len':
+                    5,
+                    'speculative_model_dir':
+                    f"{llm_models_root()}/gpt_oss/gpt-oss-120b-Eagle3",
+                },
+                'kv_cache_config': {
+                    'enable_block_reuse': False,
+                },
+            }
+        },
         # Phi-4-multimodal-instruct with chunked prefill and kv_cache_reuse
         {
             'patterns': [
diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
index a4ed2b60de..d70d1c9f40 100644
--- a/tests/integration/defs/perf/test_perf.py
+++ b/tests/integration/defs/perf/test_perf.py
@@ -169,6 +169,7 @@ MODEL_PATH_DICT = {
     "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
     "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
     "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
+    "gpt_oss_120b_eagle3": "gpt_oss/gpt-oss-120b-Eagle3",
     "nemotron_nano_3_30b_fp8": "Nemotron-Nano-3-30B-A3.5B-FP8-KVFP8-dev",
     "nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
     "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4",
diff --git a/tests/integration/test_lists/qa/llm_spark_core.txt b/tests/integration/test_lists/qa/llm_spark_core.txt
index 2da9bbb00d..54f177b68b 100644
--- a/tests/integration/test_lists/qa/llm_spark_core.txt
+++ b/tests/integration/test_lists/qa/llm_spark_core.txt
@@ -37,3 +37,5 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutl
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp8
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp4
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
+
+test_e2e.py::test_trtllm_benchmark_serving[gpt_oss/gpt-oss-20b]
diff --git a/tests/integration/test_lists/qa/llm_spark_func.txt b/tests/integration/test_lists/qa/llm_spark_func.txt
index 05a2e5e1b7..fade1ddf59 100644
--- a/tests/integration/test_lists/qa/llm_spark_func.txt
+++ b/tests/integration/test_lists/qa/llm_spark_func.txt
@@ -37,8 +37,38 @@ test_e2e.py::test_ptp_quickstart_advanced_eagle3[GPT-OSS-120B-Eagle3-gpt_oss/gpt
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
 accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype
+accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_fp8
+accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_nvfp4
+accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_27BInstruct::test_fp8_prequantized
+accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_27BInstruct::test_nvfp4_prequantized
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp8
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_fp4
 accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
+
+test_e2e.py::test_trtllm_benchmark_serving[gpt_oss/gpt-oss-20b]
+test_e2e.py::test_openai_health
+test_e2e.py::test_openai_chat_guided_decoding
+test_e2e.py::test_trtllm_multimodal_benchmark_serving
+test_e2e.py::test_openai_completions_example[pytorch]
+test_e2e.py::test_openai_reasoning[pytorch]
+test_e2e.py::test_openai_chat_harmony
+test_e2e.py::test_trtllm_benchmark_serving[llama-3.1-model/Meta-Llama-3.1-8B]
+
+examples/serve/test_serve_negative.py::test_invalid_max_tokens
+examples/serve/test_serve_negative.py::test_invalid_temperature
+examples/serve/test_serve_negative.py::test_invalid_top_p[-0.1]
+examples/serve/test_serve_negative.py::test_invalid_top_p[1.1]
+examples/serve/test_serve_negative.py::test_empty_messages_array
+examples/serve/test_serve_negative.py::test_missing_message_role
+examples/serve/test_serve_negative.py::test_invalid_token_ids
+examples/serve/test_serve_negative.py::test_extremely_large_token_id
+examples/serve/test_serve_negative.py::test_server_stability_under_invalid_requests
+examples/serve/test_serve_negative.py::test_concurrent_invalid_requests
+examples/serve/test_serve_negative.py::test_mixed_valid_invalid_requests
+examples/serve/test_serve_negative.py::test_health_check_during_errors
+examples/serve/test_serve_negative.py::test_request_exceeds_context_length
+examples/serve/test_serve_negative.py::test_malformed_json_request
+examples/serve/test_serve_negative.py::test_missing_content_type_header
+examples/serve/test_serve_negative.py::test_extremely_large_batch
diff --git a/tests/integration/test_lists/qa/llm_spark_perf.yml b/tests/integration/test_lists/qa/llm_spark_perf.yml
index 8447702dff..d9f6755179 100644
--- a/tests/integration/test_lists/qa/llm_spark_perf.yml
+++ b/tests/integration/test_lists/qa/llm_spark_perf.yml
@@ -7,7 +7,10 @@ llm_spark_perf:
         lte: 1
   tests:
     - perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    # GPT-OSS 120B normal case (no spec dec)
     - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
+    # GPT-OSS 120B spec dec case (Eagle3)
+    - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-streaming-float4-maxbs:1-maxnt:4096-input_output_len:2048,128-reqs:1-con:1]
     - perf/test_perf.py::test_perf[nvidia_nemotron_nano_9b_v2_nvfp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
     - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
     - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]