diff --git a/benchmarks/cpp/utils/prepare_real_data.py b/benchmarks/cpp/utils/prepare_real_data.py
index e15ef9267e..4441c57ee4 100644
--- a/benchmarks/cpp/utils/prepare_real_data.py
+++ b/benchmarks/cpp/utils/prepare_real_data.py
@@ -2,10 +2,11 @@ import logging
 import random
 import re
 import tempfile
+from pathlib import Path
 from typing import Optional
 
 import click
-from datasets import load_dataset
+import datasets
 from PIL import Image
 from pydantic import BaseModel, model_validator
 from utils.utils import (get_norm_dist_lengths, multimodal_dataset_dump,
@@ -29,7 +30,7 @@ def validate_output_len_dist(ctx, param, value):
 class DatasetConfig(BaseModel):
     """Dataset configurations."""
     """Name of the dataset on HuggingFace."""
-    name: str
+    name: Optional[str] = None
     """Config name of the dataset if existing."""
     config_name: Optional[str] = None
     """Split of the dataset. Typical values: train, validation, test. Setting to None will include all splits."""
@@ -44,6 +45,8 @@ class DatasetConfig(BaseModel):
     prompt: Optional[str] = None
     """The dataset dictionary key used to derive the output sequence length. Set to None if the dataset does not have a key for output."""
     output_key: Optional[str]
+    """The local path to the dataset to be loaded when using a local cache."""
+    local_path: Optional[str] = None
 
     @model_validator(mode='after')
     def check_prompt(self) -> 'DatasetConfig':
@@ -54,19 +57,40 @@ class DatasetConfig(BaseModel):
             raise AssertionError("Either --prompt-key or --prompt must be set.")
         return self
 
+    @model_validator(mode='after')
+    def check_name_and_local_path(self) -> 'DatasetConfig':
+        if self.name and self.local_path:
+            raise AssertionError(
+                "--dataset-name and --dataset-local-path cannot be set at the same time."
+            )
+        if (not self.name) and (not self.local_path):
+            raise AssertionError(
+                "Either --dataset-name or --dataset-local-path must be set.")
+        return self
+
     @property
     def query(self):
         """Generate the query for HuggingFace `datasets.load_dataset()`"""
+        first_arg = self.local_path if self.local_path else self.name
+
         if self.config_name:
-            return [self.name, self.config_name]
+            return [first_arg, self.config_name]
         else:
-            return [self.name]
+            return [first_arg]
+
+    @property
+    def display_name(self) -> str:
+        """Returns a human-readable identifier for error messages."""
+        # model_validator ensures exactly one of name or local_path is set
+        if self.name is not None:
+            return self.name
+        return self.local_path
 
     def get_prompt(self, req):
         """Get the prompt sentence from the given request."""
         if self.prompt_key:
             assert self.prompt_key in req, (
-                f"Dataset {self.name} does not have key '{self.prompt_key}'. "
+                f"Dataset {self.display_name} does not have key '{self.prompt_key}'. "
                 "Please set --prompt-key to one of the available keys: "
                 f"{req.keys()}")
             return req[self.prompt_key]
@@ -76,7 +100,7 @@ class DatasetConfig(BaseModel):
     def get_input(self, req):
         """Get the input sentence from the given request."""
         assert self.input_key in req, (
-            f"Dataset {self.name} does not have key '{self.input_key}'. "
+            f"Dataset {self.display_name} does not have key '{self.input_key}'. "
             "Please set --input-key to one of the available keys: "
             f"{req.keys()}")
         return req[self.input_key]
@@ -86,7 +110,7 @@ class DatasetConfig(BaseModel):
         image_keys = [self.image_key
                       ] + [f"{self.image_key}_{i}" for i in range(1, 8)]
         assert any(key in req for key in image_keys), (
-            f"Dataset {self.name} does not have key '{self.image_key}'. "
+            f"Dataset {self.display_name} does not have key '{self.image_key}'. "
             "Please set --dataset-image-key to one of the available keys: "
             f"{req.keys()}")
         images = []
@@ -101,16 +125,47 @@ class DatasetConfig(BaseModel):
             raise RuntimeError(
                 "--output-key is not set. Please either:\n"
                 "1. Define output length through --output-len-dist.\n"
-                f"2. If the dataset {self.name} has key for golden output and "
+                f"2. If the dataset {self.display_name} has key for golden output and "
                 "you wish to set output length to the length of the golden "
                 "output, set --output-key.")
         assert self.output_key in req, (
-            f"Dataset {self.name} does not have key '{self.output_key}'. "
+            f"Dataset {self.display_name} does not have key '{self.output_key}'. "
             "Please set --output-key to one of the available keys: "
             f"{req.keys()}")
         return req[self.output_key]
 
 
+def _create_dataset_load_error(e: ValueError) -> ValueError:
+    """Create a more informative ValueError from a dataset loading error.
+
+    Args:
+        e: The original ValueError from datasets.load_dataset().
+    Returns:
+        A new ValueError with additional context.
+    """
+    error_msg = str(e)
+    if "Config" in error_msg:
+        error_msg += "\n Please add the config name to the dataset config yaml."
+    elif "split" in error_msg:
+        error_msg += "\n Please specify supported split in the dataset config yaml."
+    return ValueError(error_msg)
+
+
+def load_dataset(dataset_config: DatasetConfig):
+    """Load dataset from local path or HuggingFace.
+    Args:
+        dataset_config: A `DatasetConfig` object that defines the dataset to load.
+    Returns:
+        Dataset iterator.
+    Raises:
+        ValueError: When dataset loading fails due to incorrect dataset config setting.
+    """
+    if dataset_config.local_path:
+        return load_dataset_from_local(dataset_config)
+    else:
+        return load_dataset_from_hf(dataset_config)
+
+
 def load_dataset_from_hf(dataset_config: DatasetConfig):
     """Load dataset from HuggingFace.
 
@@ -121,55 +176,117 @@ def load_dataset_from_hf(dataset_config: DatasetConfig):
     Raises:
         ValueError: When dataset loading fails due to incorrect dataset config setting.
     """
+    logging.debug(
+        f"Loading dataset from HF: query={dataset_config.query}, split={dataset_config.split}"
+    )
+
     try:
         dataset = iter(
-            load_dataset(*dataset_config.query,
-                         split=dataset_config.split,
-                         streaming=True,
-                         trust_remote_code=True))
+            datasets.load_dataset(*dataset_config.query,
+                                  split=dataset_config.split,
+                                  streaming=True,
+                                  trust_remote_code=True))
     except ValueError as e:
-        if "Config" in e:
-            e += "\n Please add the config name to the dataset config yaml."
-        elif "split" in e:
-            e += "\n Please specify supported split in the dataset config yaml."
-        raise ValueError(e)
+        raise _create_dataset_load_error(e)
+
+    logging.debug("Finished loading HF dataset")
 
     return dataset
 
 
+def load_dataset_from_local(dataset_config: DatasetConfig):
+    """Load dataset from local path.
+
+    Args:
+        dataset_config: A `DatasetConfig` object that defines the dataset to load.
+    Returns:
+        Dataset iterator.
+    Raises:
+        FileNotFoundError: When local dataset path does not exist.
+        ValueError: When dataset loading fails due to incorrect dataset config setting.
+    """
+
+    local_path = Path(dataset_config.local_path)
+
+    if not local_path.exists():
+        raise FileNotFoundError(
+            f"Local dataset path {local_path} does not exist.")
+
+    logging.debug(
+        f"Loading dataset from local path: path={local_path}, query={dataset_config.query}, split={dataset_config.split}"
+    )
+
+    # If it's a directory we can use the normal loader, otherwise custom loader
+    # depends on the file extension
+    if local_path.is_dir():
+        try:
+            dataset = datasets.load_dataset(*dataset_config.query,
+                                            split=dataset_config.split,
+                                            trust_remote_code=True)
+        except ValueError as e:
+            raise _create_dataset_load_error(e)
+    else:
+        format_map = {
+            ".json": "json",
+            ".jsonl": "json",
+            ".csv": "csv",
+            ".parquet": "parquet",
+        }
+
+        file_extension = local_path.suffix
+        dataset_type = format_map.get(file_extension)
+
+        if dataset_type is None:
+            raise ValueError(f"Unsupported file extension: {file_extension}")
+
+        try:
+            dataset = datasets.load_dataset(dataset_type,
+                                            data_files=str(local_path),
+                                            split=dataset_config.split)
+        except ValueError as e:
+            raise _create_dataset_load_error(e)
+
+    logging.debug("Finished loading local dataset")
+
+    return iter(dataset)
+
+
 @click.command()
-@click.option("--dataset-name",
-              required=True,
-              type=str,
-              help=f"Dataset name in HuggingFace.")
+@click.option("--dataset-name", type=str, help="Dataset name in HuggingFace.")
 @click.option("--dataset-config-name",
               type=str,
               default=None,
-              help=f"Dataset config name in HuggingFace (if exists).")
+              help="Dataset config name in HuggingFace (if exists).")
 @click.option("--dataset-split",
               type=str,
               required=True,
-              help=f"Split of the dataset to use.")
+              help="Split of the dataset to use.")
 @click.option("--dataset-input-key",
               type=str,
-              help=f"The dataset dictionary key for input.")
+              help="The dataset dictionary key for input.")
 @click.option("--dataset-image-key",
               type=str,
               default="image",
-              help=f"The dataset dictionary key for images.")
+              help="The dataset dictionary key for images.")
 @click.option("--dataset-prompt-key",
               type=str,
               default=None,
-              help=f"The dataset dictionary key for prompt (if exists).")
+              help="The dataset dictionary key for prompt (if exists).")
+@click.option(
+    "--dataset-local-path",
+    type=str,
+    default=None,
+    help=
+    "The local path to the dataset to be loaded when using an offline cache.")
 @click.option(
     "--dataset-prompt",
     type=str,
     default=None,
-    help=f"The prompt string when there is no prompt key for the dataset.")
+    help="The prompt string when there is no prompt key for the dataset.")
 @click.option("--dataset-output-key",
               type=str,
               default=None,
-              help=f"The dataset dictionary key for output (if exists).")
+              help="The dataset dictionary key for output (if exists).")
 @click.option(
     "--num-requests",
     type=int,
@@ -208,7 +325,7 @@ def dataset(root_args, **kwargs):
     modality = None
     multimodal_texts = []
     multimodal_image_paths = []
-    for req in load_dataset_from_hf(dataset_config):
+    for req in load_dataset(dataset_config):
         if any(key in req for key in ['image', 'image_1', 'video']):
             # multimodal input
             if 'video' in req and req['video'] is not None:
diff --git a/tests/integration/defs/cpp/test_e2e.py b/tests/integration/defs/cpp/test_e2e.py
index 5a90df6e08..ce583671cf 100644
--- a/tests/integration/defs/cpp/test_e2e.py
+++ b/tests/integration/defs/cpp/test_e2e.py
@@ -2,12 +2,81 @@ import copy
 import logging as _logger
 import os as _os
 import pathlib as _pl
-from typing import List
+from dataclasses import dataclass
+from typing import List, Optional
 
 import defs.cpp.cpp_common as _cpp
 import pytest
 
 
+@dataclass(frozen=True)
+class DatasetConfig:
+    """Configuration for a benchmark dataset."""
+    name: str
+    local_path: str
+    split: str
+    input_key: str
+    output_key: str
+    max_input_len: str
+    num_requests: str
+    config_name: Optional[str] = None
+    prompt: Optional[str] = None
+    prompt_key: Optional[str] = None
+
+    @property
+    def token_file(self) -> str:
+        return "prepared_" + self.name.replace('/', '_')
+
+    def get_dataset_args(self) -> dict[str, str]:
+        """Build the dataset args dict for prepare_dataset.py."""
+        args = {
+            '--dataset-local-path': self.local_path,
+            '--dataset-split': self.split,
+            '--dataset-input-key': self.input_key,
+            '--dataset-output-key': self.output_key,
+        }
+        if self.config_name:
+            args['--dataset-config-name'] = self.config_name
+        if self.prompt:
+            args['--dataset-prompt'] = self.prompt
+        if self.prompt_key:
+            args['--dataset-prompt-key'] = self.prompt_key
+        return args
+
+
+def get_benchmark_dataset_configs(model_cache: str) -> List[DatasetConfig]:
+    """Define dataset configurations for benchmark tests.
+
+    To add a new dataset, add a new DatasetConfig entry to this list.
+    """
+    datasets_dir = _pl.Path(model_cache) / "datasets"
+
+    return [
+        DatasetConfig(
+            name="ccdv/cnn_dailymail",
+            local_path=str(datasets_dir / "ccdv" / "cnn_dailymail"),
+            config_name="3.0.0",
+            split="validation",
+            input_key="article",
+            prompt="Summarize the following article:",
+            output_key="highlights",
+            max_input_len="256",
+            num_requests="50",
+        ),
+        DatasetConfig(
+            name="Open-Orca/1million-gpt-4",
+            local_path=str(datasets_dir / "Open-Orca" / "1million-gpt-4" /
+                           "1M-GPT4-Augmented.parquet"),
+            split="train",
+            input_key="question",
+            prompt_key="system_prompt",
+            output_key="response",
+            max_input_len="20",
+            num_requests="10",
+        ),
+    ]
+
+
 def run_single_gpu_tests(build_dir: _pl.Path,
                          test_list: List[str],
                          run_fp8=False,
@@ -93,27 +162,6 @@ def run_benchmarks(
         )
         return NotImplementedError
 
-    prompt_datasets_args = [{
-        '--dataset-name': "cnn_dailymail",
-        '--dataset-config-name': "3.0.0",
-        '--dataset-split': "validation",
-        '--dataset-input-key': "article",
-        '--dataset-prompt': "Summarize the following article:",
-        '--dataset-output-key': "highlights"
-    }, {
-        '--dataset-name': "Open-Orca/1million-gpt-4",
-        '--dataset-split': "train",
-        '--dataset-input-key': "question",
-        '--dataset-prompt-key': "system_prompt",
-        '--dataset-output-key': "response"
-    }]
-    token_files = [
-        "prepared_" + s['--dataset-name'].replace('/', '_')
-        for s in prompt_datasets_args
-    ]
-    max_input_lens = ["256", "20"]
-    num_reqs = ["50", "10"]
-
     if model_name == "gpt":
         model_engine_path = model_engine_dir / "fp16_plugin_packed_paged" / "tp1-pp1-cp1-gpu"
 
@@ -127,27 +175,25 @@ def run_benchmarks(
         # model_engine_path = model_engine_dir / model_spec_obj.get_model_path(
         # ) / "tp1-pp1-cp1-gpu"
 
-    for prompt_ds_args, tokens_f, len, num_req in zip(prompt_datasets_args,
-                                                      token_files,
-                                                      max_input_lens, num_reqs):
-
+    for config in get_benchmark_dataset_configs(model_cache):
         benchmark_src_dir = _pl.Path("benchmarks") / "cpp"
         data_dir = resources_dir / "data"
         prepare_dataset = [
             python_exe,
             str(benchmark_src_dir / "prepare_dataset.py"), "--tokenizer",
             str(tokenizer_dir), "--output",
-            str(data_dir / tokens_f), "dataset", "--max-input-len", len,
-            "--num-requests", num_req
+            str(data_dir / config.token_file), "dataset", "--max-input-len",
+            config.max_input_len, "--num-requests", config.num_requests
         ]
-        for k, v in prompt_ds_args.items():
+        for k, v in config.get_dataset_args().items():
             prepare_dataset += [k, v]
-        # https://nvbugs/4658787
-        # WAR before the prepare dataset can use offline cached dataset
+
+        # Use environment variable to force HuggingFace to use offline cached dataset
+        offline_env = {**_os.environ, 'HF_DATASETS_OFFLINE': '1'}
         _cpp.run_command(prepare_dataset,
                          cwd=root_dir,
                          timeout=300,
-                         env={'HF_DATASETS_OFFLINE': '0'})
+                         env=offline_env)
 
         for batching_type in batching_types:
             for api_type in api_types:
@@ -157,7 +203,7 @@ def run_benchmarks(
                     str(model_engine_path), "--type",
                     str(batching_type), "--api",
                     str(api_type), "--dataset",
-                    str(data_dir / tokens_f)
+                    str(data_dir / config.token_file)
                 ]
                 if model_name == "enc_dec":
                     benchmark += [
@@ -175,12 +221,13 @@ def run_benchmarks(
                                  cwd=root_dir,
                                  timeout=600)
 
-        if "IFB" in batching_type and "executor" in api_types:
+        if "IFB" in batching_types and "executor" in api_types:
             # executor streaming test
             benchmark = [
                 str(benchmark_exe_dir / "gptManagerBenchmark"), "--engine_dir",
                 str(model_engine_path), "--type", "IFB", "--dataset",
-                str(data_dir / tokens_f), "--api", "executor", "--streaming"
+                str(data_dir / config.token_file), "--api", "executor",
+                "--streaming"
             ]
             if model_name == "enc_dec":
                 benchmark += [
@@ -263,7 +310,6 @@ def test_model(build_google_tests, model, prepare_model, run_model_tests,
     run_model_tests(model, run_fp8)
 
 
-@pytest.mark.skip(reason="https://nvbugs/5601670")
 @pytest.mark.parametrize("build_google_tests", ["80", "86", "89", "90"],
                          indirect=True)
 @pytest.mark.parametrize("model", ["bart", "gpt", "t5"])