add nested "data"

Signed-off-by: ixlmar <206748156+ixlmar@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-12-05 14:01:39 +00:00 · 2025-12-05 14:01:39 +00:00 · b2a328c706
commit b2a328c706
parent 045331d494
2 changed files with 13 additions and 3 deletions
--- a/tensorrt_llm/serve/chat_utils.py
+++ b/tensorrt_llm/serve/chat_utils.py
@ -34,9 +34,16 @@ class ChatCompletionContentPartVideoParam(TypedDict, total=False):
    type: Required[Literal["video_url"]]


+class ImageEmbedsData(TypedDict):
+    """Type definition for serialized image embeddings structure."""
+    data: Required[str]
+
+
 class ChatCompletionContentPartImageEmbedsParam(TypedDict, total=False):
    """Type definition for image embeddings passed in base64-encoded PyTorch tensor format."""
-    image_embeds: Required[str]
+    image_embeds: Required[
+        # NB: Besides "data", could support "url" and "ipc_handle" in the future.
+        ImageEmbedsData]
    type: Required[Literal["image_embeds"]]


@ -75,7 +82,8 @@ MM_PARSER_MAP: dict[str, Callable[[ChatCompletionContentPartParam], Union[
        "audio_url":
        lambda part: _AudioParser(part).get("audio_url", {}).get("url", None),
        "image_embeds":
-        lambda part: _ImageEmbedsParser(part).get("image_embeds", None),
+        lambda part: _ImageEmbedsParser(part).get("image_embeds", {}).get(
+            "data", None),
    }

 # Map from content part tags used to directly provide embeddings
--- a/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py
+++ b/tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py
@ -197,7 +197,9 @@ def test_single_chat_session_image_embeds(
    assert image_content["type"] == "image_url"
    image_content.clear()
    image_content["type"] = "image_embeds"
-    image_content["image_embeds"] = b64encode(mm_embed_bytes).decode("ascii")
+    image_content["image_embeds"] = {
+        "data": b64encode(mm_embed_bytes).decode("ascii")
+    }

    # test single completion
    #