mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
53 lines
1.3 KiB
Python
53 lines
1.3 KiB
Python
### :title OpenAI Completion Client with JSON Schema
|
|
|
|
# This example requires to specify `guided_decoding_backend` as
|
|
# `xgrammar` or `llguidance` in the config.yaml file.
|
|
import json
|
|
|
|
from openai import OpenAI
|
|
|
|
client = OpenAI(
|
|
base_url="http://localhost:8000/v1",
|
|
api_key="tensorrt_llm",
|
|
)
|
|
|
|
response = client.chat.completions.create(
|
|
model="TinyLlama-1.1B-Chat-v1.0",
|
|
messages=[{
|
|
"role": "system",
|
|
"content": "you are a helpful assistant"
|
|
}, {
|
|
"role":
|
|
"user",
|
|
"content":
|
|
f"Give me the information of the biggest city of China in the JSON format.",
|
|
}],
|
|
temperature=0,
|
|
response_format={
|
|
"type": "json",
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {
|
|
"type": "string"
|
|
},
|
|
"population": {
|
|
"type": "integer"
|
|
},
|
|
},
|
|
"required": ["name", "population"],
|
|
"chat_template_kwargs": {
|
|
"enable_thinking": False
|
|
}
|
|
}
|
|
},
|
|
)
|
|
|
|
content = response.choices[0].message.content
|
|
try:
|
|
response_json = json.loads(content)
|
|
assert "name" in response_json and "population" in response_json
|
|
print(content)
|
|
except json.JSONDecodeError:
|
|
print("Failed to decode JSON response")
|