From a5540acfce32e1a9826f19ad6151ad4d9c8a243d Mon Sep 17 00:00:00 2001 From: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> Date: Wed, 30 Jul 2025 16:33:08 +0800 Subject: [PATCH] chore: add trtllm-serve json schema example into doc. (#6418) Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> --- .../openai_completion_client_json_schema.py | 42 +++++++++++++++++++ .../llmapi/apps/_test_openai_chat_json.py | 5 --- .../llmapi/apps/_test_trtllm_serve_example.py | 13 +++--- 3 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 examples/serve/openai_completion_client_json_schema.py diff --git a/examples/serve/openai_completion_client_json_schema.py b/examples/serve/openai_completion_client_json_schema.py new file mode 100644 index 0000000000..2f110270f5 --- /dev/null +++ b/examples/serve/openai_completion_client_json_schema.py @@ -0,0 +1,42 @@ +### :title OpenAI Completion Client with JSON Schema + +from openai import OpenAI + +client = OpenAI( + base_url="http://localhost:8000/v1", + api_key="tensorrt_llm", +) + +response = client.chat.completions.create( + model="TinyLlama-1.1B-Chat-v1.0", + messages=[{ + "role": "system", + "content": "you are a helpful assistant" + }, { + "role": + "user", + "content": + f"Give me the information of the biggest city of China in the JSON format.", + }], + max_tokens=100, + temperature=0, + response_format={ + "type": "json", + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "population": { + "type": "integer" + }, + }, + "required": ["name", "population"], + "chat_template_kwargs": { + "enable_thinking": False + } + } + }, +) +print(response.choices[0].message.content) diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_json.py b/tests/unittest/llmapi/apps/_test_openai_chat_json.py index 5518afdba7..2462188723 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat_json.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat_json.py @@ -57,11 +57,6 @@ def client(server: RemoteOpenAIServer): return server.get_client() -@pytest.fixture(scope="module") -def async_client(server: RemoteOpenAIServer): - return server.get_async_client() - - @pytest.fixture(scope="module") def user_profile_schema(): """Provides a sample JSON schema for a user profile.""" diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py index ca83c2e605..262eafa820 100644 --- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py +++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py @@ -29,12 +29,13 @@ def example_root(): return os.path.join(llm_root, "examples", "serve") -@pytest.mark.parametrize("exe, script", - [("python3", "openai_chat_client.py"), - ("python3", "openai_completion_client.py"), - ("bash", "curl_chat_client.sh"), - ("bash", "curl_completion_client.sh"), - ("bash", "genai_perf_client.sh")]) +@pytest.mark.parametrize( + "exe, script", [("python3", "openai_chat_client.py"), + ("python3", "openai_completion_client.py"), + ("python3", "openai_completion_client_json_schema.py"), + ("bash", "curl_chat_client.sh"), + ("bash", "curl_completion_client.sh"), + ("bash", "genai_perf_client.sh")]) def test_trtllm_serve_examples(exe: str, script: str, server: RemoteOpenAIServer, example_root: str): client_script = os.path.join(example_root, script)