From a5540acfce32e1a9826f19ad6151ad4d9c8a243d Mon Sep 17 00:00:00 2001
From: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
Date: Wed, 30 Jul 2025 16:33:08 +0800
Subject: [PATCH] chore: add trtllm-serve json schema example into doc. (#6418)

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
---
 .../openai_completion_client_json_schema.py   | 42 +++++++++++++++++++
 .../llmapi/apps/_test_openai_chat_json.py     |  5 ---
 .../llmapi/apps/_test_trtllm_serve_example.py | 13 +++---
 3 files changed, 49 insertions(+), 11 deletions(-)
 create mode 100644 examples/serve/openai_completion_client_json_schema.py

diff --git a/examples/serve/openai_completion_client_json_schema.py b/examples/serve/openai_completion_client_json_schema.py
new file mode 100644
index 0000000000..2f110270f5
--- /dev/null
+++ b/examples/serve/openai_completion_client_json_schema.py
@@ -0,0 +1,42 @@
+### :title OpenAI Completion Client with JSON Schema
+
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8000/v1",
+    api_key="tensorrt_llm",
+)
+
+response = client.chat.completions.create(
+    model="TinyLlama-1.1B-Chat-v1.0",
+    messages=[{
+        "role": "system",
+        "content": "you are a helpful assistant"
+    }, {
+        "role":
+        "user",
+        "content":
+        f"Give me the information of the biggest city of China in the JSON format.",
+    }],
+    max_tokens=100,
+    temperature=0,
+    response_format={
+        "type": "json",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "name": {
+                    "type": "string"
+                },
+                "population": {
+                    "type": "integer"
+                },
+            },
+            "required": ["name", "population"],
+            "chat_template_kwargs": {
+                "enable_thinking": False
+            }
+        }
+    },
+)
+print(response.choices[0].message.content)
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_json.py b/tests/unittest/llmapi/apps/_test_openai_chat_json.py
index 5518afdba7..2462188723 100644
--- a/tests/unittest/llmapi/apps/_test_openai_chat_json.py
+++ b/tests/unittest/llmapi/apps/_test_openai_chat_json.py
@@ -57,11 +57,6 @@ def client(server: RemoteOpenAIServer):
     return server.get_client()
 
 
-@pytest.fixture(scope="module")
-def async_client(server: RemoteOpenAIServer):
-    return server.get_async_client()
-
-
 @pytest.fixture(scope="module")
 def user_profile_schema():
     """Provides a sample JSON schema for a user profile."""
diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
index ca83c2e605..262eafa820 100644
--- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
+++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
@@ -29,12 +29,13 @@ def example_root():
     return os.path.join(llm_root, "examples", "serve")
 
 
-@pytest.mark.parametrize("exe, script",
-                         [("python3", "openai_chat_client.py"),
-                          ("python3", "openai_completion_client.py"),
-                          ("bash", "curl_chat_client.sh"),
-                          ("bash", "curl_completion_client.sh"),
-                          ("bash", "genai_perf_client.sh")])
+@pytest.mark.parametrize(
+    "exe, script", [("python3", "openai_chat_client.py"),
+                    ("python3", "openai_completion_client.py"),
+                    ("python3", "openai_completion_client_json_schema.py"),
+                    ("bash", "curl_chat_client.sh"),
+                    ("bash", "curl_completion_client.sh"),
+                    ("bash", "genai_perf_client.sh")])
 def test_trtllm_serve_examples(exe: str, script: str,
                                server: RemoteOpenAIServer, example_root: str):
     client_script = os.path.join(example_root, script)