mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* feat: trtllm-serve multimodal support Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> * remove disable argument Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> * remove disable Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> * add and separate tests and move the doc Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> * remove block_resue arg from serve.py Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> --------- Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> Co-authored-by: Haohang Huang <31998628+symphonylyh@users.noreply.github.com>
37 lines
807 B
Python
37 lines
807 B
Python
### OpenAI Chat Client
|
|
|
|
from openai import OpenAI
|
|
|
|
client = OpenAI(
|
|
base_url="http://localhost:8000/v1",
|
|
api_key="tensorrt_llm",
|
|
)
|
|
|
|
# Single image inference
|
|
response = client.chat.completions.create(
|
|
model="Qwen2-VL-7B-Instruct",
|
|
messages=[{
|
|
"role": "system",
|
|
"content": "you are a helpful assistant"
|
|
}, {
|
|
"role":
|
|
"user",
|
|
"content": [{
|
|
"type": "text",
|
|
"text": "Describe the natural environment in the image."
|
|
}, {
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url":
|
|
"https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
|
|
}
|
|
}]
|
|
}],
|
|
max_tokens=64,
|
|
)
|
|
print(response)
|
|
|
|
# TODO
|
|
# multi-image inference
|
|
# video inference
|