diff --git a/tensorrt_llm/serve/chat_utils.py b/tensorrt_llm/serve/chat_utils.py
index 581615a201..3adee52b9e 100644
--- a/tensorrt_llm/serve/chat_utils.py
+++ b/tensorrt_llm/serve/chat_utils.py
@@ -34,9 +34,16 @@ class ChatCompletionContentPartVideoParam(TypedDict, total=False):
     type: Required[Literal["video_url"]]
 
 
+class ImageEmbedsData(TypedDict):
+    """Type definition for serialized image embeddings structure."""
+    data: Required[str]
+
+
 class ChatCompletionContentPartImageEmbedsParam(TypedDict, total=False):
     """Type definition for image embeddings passed in base64-encoded PyTorch tensor format."""
-    image_embeds: Required[str]
+    image_embeds: Required[
+        # NB: Besides "data", could support "url" and "ipc_handle" in the future.
+        ImageEmbedsData]
     type: Required[Literal["image_embeds"]]
 
 
@@ -75,7 +82,8 @@ MM_PARSER_MAP: dict[str, Callable[[ChatCompletionContentPartParam], Union[
         "audio_url":
         lambda part: _AudioParser(part).get("audio_url", {}).get("url", None),
         "image_embeds":
-        lambda part: _ImageEmbedsParser(part).get("image_embeds", None),
+        lambda part: _ImageEmbedsParser(part).get("image_embeds", {}).get(
+            "data", None),
     }
 
 # Map from content part tags used to directly provide embeddings
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py b/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py
index 5968c7681e..dd51407e0b 100644
--- a/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py
+++ b/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py
@@ -197,7 +197,9 @@ def test_single_chat_session_image_embeds(
     assert image_content["type"] == "image_url"
     image_content.clear()
     image_content["type"] = "image_embeds"
-    image_content["image_embeds"] = b64encode(mm_embed_bytes).decode("ascii")
+    image_content["image_embeds"] = {
+        "data": b64encode(mm_embed_bytes).decode("ascii")
+    }
 
     # test single completion
     #