mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Examples] Resettle generate examples. (#36464)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
+14
-14
@@ -388,10 +388,10 @@ steps:
|
||||
- python3 basic/offline_inference/embed.py
|
||||
- python3 basic/offline_inference/score.py
|
||||
# Multi-modal models
|
||||
- python3 offline_inference/audio_language.py --seed 0
|
||||
- python3 offline_inference/vision_language.py --seed 0
|
||||
- python3 offline_inference/vision_language_multi_image.py --seed 0
|
||||
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
|
||||
- python3 generate/multimodal/audio_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
|
||||
- python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
|
||||
# Pooling models
|
||||
- python3 pooling/embed/vision_embedding_offline.py --seed 0
|
||||
# Features demo
|
||||
@@ -1647,10 +1647,10 @@ steps:
|
||||
- python3 basic/offline_inference/embed.py
|
||||
- python3 basic/offline_inference/score.py
|
||||
# Multi-modal models
|
||||
- python3 offline_inference/audio_language.py --seed 0
|
||||
- python3 offline_inference/vision_language.py --seed 0
|
||||
- python3 offline_inference/vision_language_multi_image.py --seed 0
|
||||
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
|
||||
- python3 generate/multimodal/audio_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
|
||||
- python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
|
||||
# Pooling models
|
||||
- python3 pooling/embed/vision_embedding_offline.py --seed 0
|
||||
# Features demo
|
||||
@@ -1951,8 +1951,8 @@ steps:
|
||||
- pytest -v -s tests/models/multimodal/processing/
|
||||
- pytest -v -s tests/models/multimodal/test_mapping.py
|
||||
- python3 examples/basic/offline_inference/chat.py
|
||||
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
|
||||
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
|
||||
- python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
|
||||
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
|
||||
|
||||
#------------------------------------------------------- mi300 · quantization --------------------------------------------------------#
|
||||
|
||||
@@ -2930,10 +2930,10 @@ steps:
|
||||
- python3 basic/offline_inference/embed.py
|
||||
- python3 basic/offline_inference/score.py
|
||||
# Multi-modal models
|
||||
- python3 offline_inference/audio_language.py --seed 0
|
||||
- python3 offline_inference/vision_language.py --seed 0
|
||||
- python3 offline_inference/vision_language_multi_image.py --seed 0
|
||||
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
|
||||
- python3 generate/multimodal/audio_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
|
||||
- python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
|
||||
# Pooling models
|
||||
- python3 pooling/embed/vision_embedding_offline.py --seed 0
|
||||
# Features demo
|
||||
|
||||
@@ -113,10 +113,10 @@ steps:
|
||||
- python3 basic/offline_inference/embed.py
|
||||
- python3 basic/offline_inference/score.py
|
||||
# for multi-modal models
|
||||
- python3 offline_inference/audio_language.py --seed 0
|
||||
- python3 offline_inference/vision_language.py --seed 0
|
||||
- python3 offline_inference/vision_language_multi_image.py --seed 0
|
||||
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
|
||||
- python3 generate/multimodal/audio_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
|
||||
- python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
|
||||
# for pooling models
|
||||
- python3 pooling/embed/vision_embedding_offline.py --seed 0
|
||||
# for features demo
|
||||
|
||||
@@ -44,10 +44,10 @@ steps:
|
||||
#- python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 # TODO
|
||||
#- python3 basic/offline_inference/embed.py # TODO
|
||||
# for multi-modal models
|
||||
- python3 offline_inference/audio_language.py --seed 0
|
||||
- python3 offline_inference/vision_language.py --seed 0
|
||||
- python3 offline_inference/vision_language_multi_image.py --seed 0
|
||||
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
|
||||
- python3 generate/multimodal/audio_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_offline.py --seed 0
|
||||
- python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
|
||||
- python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
|
||||
# for pooling models
|
||||
- python3 pooling/embed/vision_embedding_offline.py --seed 0
|
||||
# for features demo
|
||||
|
||||
@@ -69,9 +69,9 @@ steps:
|
||||
- pytest -v -s tests/models/multimodal/processing/
|
||||
- pytest -v -s tests/models/multimodal/test_mapping.py
|
||||
- python3 examples/basic/offline_inference/chat.py
|
||||
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
|
||||
- python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
|
||||
# Whisper needs spawn method to avoid deadlock
|
||||
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
|
||||
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
|
||||
|
||||
- label: Transformers Backward Compatibility Models Test
|
||||
working_dir: "/vllm-workspace/"
|
||||
@@ -83,7 +83,7 @@ steps:
|
||||
- pytest -v -s tests/models/test_transformers.py
|
||||
- pytest -v -s tests/models/multimodal/processing/
|
||||
- pytest -v -s tests/models/multimodal/test_mapping.py
|
||||
- python3 examples/offline_inference/basic/chat.py
|
||||
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
|
||||
- python3 examples/basic/offline_inference/chat.py
|
||||
- python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
|
||||
# Whisper needs spawn method to avoid deadlock
|
||||
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
|
||||
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
|
||||
|
||||
+1
-5
@@ -389,11 +389,7 @@ pull_request_rules:
|
||||
- files~=^tests/entrypoints/anthropic/.*tool.*
|
||||
- files~=^vllm/tool_parsers/
|
||||
- files=docs/features/tool_calling.md
|
||||
- files~=^examples/tool_chat_*
|
||||
- files=examples/offline_inference/chat_with_tools.py
|
||||
- files=examples/online_serving/openai_chat_completion_client_with_tools_required.py
|
||||
- files=examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
|
||||
- files=examples/online_serving/openai_chat_completion_client_with_tools.py
|
||||
- files~=^examples/tool_calling/
|
||||
actions:
|
||||
label:
|
||||
add:
|
||||
|
||||
@@ -68,7 +68,7 @@ You can pass a single image to the `'image'` field of the multi-modal dictionary
|
||||
print(generated_text)
|
||||
```
|
||||
|
||||
Full example: [examples/offline_inference/vision_language.py](../../examples/offline_inference/vision_language.py)
|
||||
Full example: [examples/generate/multimodal/vision_language_offline.py](../../examples/generate/multimodal/vision_language_offline.py)
|
||||
|
||||
To substitute multiple images inside the same text prompt, you can pass in a list of images instead:
|
||||
|
||||
@@ -101,7 +101,7 @@ To substitute multiple images inside the same text prompt, you can pass in a lis
|
||||
print(generated_text)
|
||||
```
|
||||
|
||||
Full example: [examples/offline_inference/vision_language_multi_image.py](../../examples/offline_inference/vision_language_multi_image.py)
|
||||
Full example: [examples/generate/multimodal/vision_language_multi_image_offline.py](../../examples/generate/multimodal/vision_language_multi_image_offline.py)
|
||||
|
||||
If using the [LLM.chat](../models/generative_models.md#llmchat) method, you can pass images directly in the message content using various formats: image URLs, PIL Image objects, or pre-computed embeddings:
|
||||
|
||||
@@ -287,13 +287,13 @@ Instead of NumPy arrays, you can also pass `'torch.Tensor'` instances, as shown
|
||||
!!! note
|
||||
'process_vision_info' is only applicable to Qwen2.5-VL and similar models.
|
||||
|
||||
Full example: [examples/offline_inference/vision_language.py](../../examples/offline_inference/vision_language.py)
|
||||
Full example: [examples/generate/multimodal/vision_language_offline.py](../../examples/generate/multimodal/vision_language_offline.py)
|
||||
|
||||
### Audio Inputs
|
||||
|
||||
You can pass a tuple `(array, sampling_rate)` to the `'audio'` field of the multi-modal dictionary.
|
||||
|
||||
Full example: [examples/offline_inference/audio_language.py](../../examples/offline_inference/audio_language.py)
|
||||
Full example: [examples/generate/multimodal/audio_language_offline.py](../../examples/generate/multimodal/audio_language_offline.py)
|
||||
|
||||
#### Chunking Long Audio for Transcription
|
||||
|
||||
@@ -674,7 +674,7 @@ Then, you can use the OpenAI client as follows:
|
||||
print("Chat completion output:", chat_response.choices[0].message.content)
|
||||
```
|
||||
|
||||
Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
|
||||
Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
|
||||
|
||||
!!! tip
|
||||
Loading from local file paths is also supported on vLLM: You can specify the allowed local media path via `--allowed-local-media-path` when launching the API server/engine,
|
||||
@@ -745,7 +745,7 @@ Then, you can use the OpenAI client as follows:
|
||||
print("Chat completion output from image url:", result)
|
||||
```
|
||||
|
||||
Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
|
||||
Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
|
||||
|
||||
!!! note
|
||||
By default, the timeout for fetching videos through HTTP URL is `30` seconds.
|
||||
@@ -958,7 +958,7 @@ Alternatively, you can pass `audio_url`, which is the audio counterpart of `imag
|
||||
print("Chat completion output from audio url:", result)
|
||||
```
|
||||
|
||||
Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
|
||||
Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
|
||||
|
||||
!!! note
|
||||
By default, the timeout for fetching audios through HTTP URL is `10` seconds.
|
||||
|
||||
@@ -202,7 +202,7 @@ The reasoning content is also available when both tool calling and the reasoning
|
||||
print(f"Arguments: {tool_call.arguments}")
|
||||
```
|
||||
|
||||
For more examples, please refer to [examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py](../../examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py).
|
||||
For more examples, please refer to [examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py](../../examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py).
|
||||
|
||||
## Server-Level Default Chat Template Kwargs
|
||||
|
||||
|
||||
@@ -251,7 +251,7 @@ The following extra parameters are supported:
|
||||
Our Responses API is compatible with [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses);
|
||||
you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
|
||||
|
||||
Code example: [examples/online_serving/openai_responses_client_with_tools.py](../../examples/online_serving/openai_responses_client_with_tools.py)
|
||||
Code example: [examples/online_serving/openai_responses_client_with_tools.py](../../examples/tool_calling/openai_responses_client_with_tools.py)
|
||||
|
||||
#### Extra parameters
|
||||
|
||||
@@ -279,7 +279,7 @@ you can use the [official OpenAI Python client](https://github.com/openai/openai
|
||||
!!! note
|
||||
To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`.
|
||||
|
||||
Code example: [examples/online_serving/openai_transcription_client.py](../../examples/online_serving/openai_transcription_client.py)
|
||||
Code example: [examples/speech_to_text/openai/openai_transcription_client.py](../../examples/speech_to_text/openai/openai_transcription_client.py)
|
||||
|
||||
NOTE: beam search is currently supported in the transcriptions endpoint for encoder-decoder multimodal models, e.g., whisper, but highly inefficient as work for handling the encoder/decoder cache is actively ongoing. This is an active point of ongoing optimization and will be handled properly in the very near future.
|
||||
|
||||
@@ -397,7 +397,7 @@ Please mind that the popular `openai/whisper-large-v3-turbo` model does not supp
|
||||
!!! note
|
||||
To use the Translation API, please install with extra audio dependencies using `pip install vllm[audio]`.
|
||||
|
||||
Code example: [examples/online_serving/openai_translation_client.py](../../examples/online_serving/openai_translation_client.py)
|
||||
Code example: [examples/speech_to_text/openai/openai_translation_client.py](../../examples/speech_to_text/openai/openai_translation_client.py)
|
||||
|
||||
#### Extra Parameters
|
||||
|
||||
|
||||
Executable → Regular
+6
-6
@@ -6,15 +6,15 @@ This folder provides several example scripts on how to inference Qwen2.5-Omni of
|
||||
|
||||
```bash
|
||||
# Audio + image + video
|
||||
python examples/offline_inference/qwen2_5_omni/only_thinker.py \
|
||||
python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
|
||||
-q mixed_modalities
|
||||
|
||||
# Read vision and audio inputs from a single video file
|
||||
python examples/offline_inference/qwen2_5_omni/only_thinker.py \
|
||||
python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
|
||||
-q use_audio_in_video
|
||||
|
||||
# Multiple audios
|
||||
python examples/offline_inference/qwen2_5_omni/only_thinker.py \
|
||||
python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
|
||||
-q multi_audios
|
||||
```
|
||||
|
||||
@@ -24,16 +24,16 @@ You can also test Qwen2.5-Omni on a single modality:
|
||||
|
||||
```bash
|
||||
# Process audio inputs
|
||||
python examples/offline_inference/audio_language.py \
|
||||
python examples/generate/multimodal/audio_language_offline.py \
|
||||
--model-type qwen2_5_omni
|
||||
|
||||
# Process image inputs
|
||||
python examples/offline_inference/vision_language.py \
|
||||
python examples/generate/multimodal/vision_language_offline.py \
|
||||
--modality image \
|
||||
--model-type qwen2_5_omni
|
||||
|
||||
# Process video inputs
|
||||
python examples/offline_inference/vision_language.py \
|
||||
python examples/generate/multimodal/vision_language_offline.py \
|
||||
--modality video \
|
||||
--model-type qwen2_5_omni
|
||||
```
|
||||
Executable → Regular
Executable → Regular
+1
-1
@@ -1402,7 +1402,7 @@ def run_mantis(questions: list[str], modality: str) -> ModelRequestData:
|
||||
# MiniCPM-V
|
||||
def run_minicpmv_base(questions: list[str], modality: str, model_name):
|
||||
assert modality in ["image", "video", "image+video"]
|
||||
# If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language.py` # noqa
|
||||
# If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language_offline.py` # noqa
|
||||
|
||||
# 2.0
|
||||
# The official repo doesn't work yet, so we need to use a fork for now
|
||||
Reference in New Issue
Block a user