mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
chore: add trtllm-serve json schema example into doc. (#6418)
Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
This commit is contained in:
parent
2fe9cc0889
commit
a5540acfce
42
examples/serve/openai_completion_client_json_schema.py
Normal file
42
examples/serve/openai_completion_client_json_schema.py
Normal file
@ -0,0 +1,42 @@
|
||||
### :title OpenAI Completion Client with JSON Schema
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
base_url="http://localhost:8000/v1",
|
||||
api_key="tensorrt_llm",
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="TinyLlama-1.1B-Chat-v1.0",
|
||||
messages=[{
|
||||
"role": "system",
|
||||
"content": "you are a helpful assistant"
|
||||
}, {
|
||||
"role":
|
||||
"user",
|
||||
"content":
|
||||
f"Give me the information of the biggest city of China in the JSON format.",
|
||||
}],
|
||||
max_tokens=100,
|
||||
temperature=0,
|
||||
response_format={
|
||||
"type": "json",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"population": {
|
||||
"type": "integer"
|
||||
},
|
||||
},
|
||||
"required": ["name", "population"],
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": False
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
print(response.choices[0].message.content)
|
||||
@ -57,11 +57,6 @@ def client(server: RemoteOpenAIServer):
|
||||
return server.get_client()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def async_client(server: RemoteOpenAIServer):
|
||||
return server.get_async_client()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def user_profile_schema():
|
||||
"""Provides a sample JSON schema for a user profile."""
|
||||
|
||||
@ -29,12 +29,13 @@ def example_root():
|
||||
return os.path.join(llm_root, "examples", "serve")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("exe, script",
|
||||
[("python3", "openai_chat_client.py"),
|
||||
("python3", "openai_completion_client.py"),
|
||||
("bash", "curl_chat_client.sh"),
|
||||
("bash", "curl_completion_client.sh"),
|
||||
("bash", "genai_perf_client.sh")])
|
||||
@pytest.mark.parametrize(
|
||||
"exe, script", [("python3", "openai_chat_client.py"),
|
||||
("python3", "openai_completion_client.py"),
|
||||
("python3", "openai_completion_client_json_schema.py"),
|
||||
("bash", "curl_chat_client.sh"),
|
||||
("bash", "curl_completion_client.sh"),
|
||||
("bash", "genai_perf_client.sh")])
|
||||
def test_trtllm_serve_examples(exe: str, script: str,
|
||||
server: RemoteOpenAIServer, example_root: str):
|
||||
client_script = os.path.join(example_root, script)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user