diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index a8c9e409438..68179dcb68c 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -388,10 +388,10 @@ steps:
     - python3 basic/offline_inference/embed.py
     - python3 basic/offline_inference/score.py
     # Multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
     # Pooling models
     - python3 pooling/embed/vision_embedding_offline.py --seed 0
     # Features demo
@@ -1647,10 +1647,10 @@ steps:
     - python3 basic/offline_inference/embed.py
     - python3 basic/offline_inference/score.py
     # Multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
     # Pooling models
     - python3 pooling/embed/vision_embedding_offline.py --seed 0
     # Features demo
@@ -1951,8 +1951,8 @@ steps:
   - pytest -v -s tests/models/multimodal/processing/
   - pytest -v -s tests/models/multimodal/test_mapping.py
   - python3 examples/basic/offline_inference/chat.py
-  - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
-  - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
+  - python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
+  - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
 
 #-------------------------------------------------------  mi300 · quantization  --------------------------------------------------------#
 
@@ -2930,10 +2930,10 @@ steps:
   - python3 basic/offline_inference/embed.py
   - python3 basic/offline_inference/score.py
   # Multi-modal models
-  - python3 offline_inference/audio_language.py --seed 0
-  - python3 offline_inference/vision_language.py --seed 0
-  - python3 offline_inference/vision_language_multi_image.py --seed 0
-  - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+  - python3 generate/multimodal/audio_language_offline.py --seed 0
+  - python3 generate/multimodal/vision_language_offline.py --seed 0
+  - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+  - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
   # Pooling models
   - python3 pooling/embed/vision_embedding_offline.py --seed 0
   # Features demo
diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml
index 0cf9ec43392..d0930be156d 100644
--- a/.buildkite/test_areas/misc.yaml
+++ b/.buildkite/test_areas/misc.yaml
@@ -113,10 +113,10 @@ steps:
     - python3 basic/offline_inference/embed.py
     - python3 basic/offline_inference/score.py
     # for multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
      # for pooling models
     - python3 pooling/embed/vision_embedding_offline.py --seed 0
     # for features demo
diff --git a/.buildkite/test_areas/model_runner_v2.yaml b/.buildkite/test_areas/model_runner_v2.yaml
index 7aa1870f0db..2b88c00d6b7 100644
--- a/.buildkite/test_areas/model_runner_v2.yaml
+++ b/.buildkite/test_areas/model_runner_v2.yaml
@@ -44,10 +44,10 @@ steps:
     #- python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10  # TODO
     #- python3 basic/offline_inference/embed.py   # TODO
     # for multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
     # for pooling models
     - python3 pooling/embed/vision_embedding_offline.py --seed 0
     # for features demo
diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml
index ed782c061fa..73cf8c53bc9 100644
--- a/.buildkite/test_areas/models_basic.yaml
+++ b/.buildkite/test_areas/models_basic.yaml
@@ -69,9 +69,9 @@ steps:
     - pytest -v -s tests/models/multimodal/processing/
     - pytest -v -s tests/models/multimodal/test_mapping.py
     - python3 examples/basic/offline_inference/chat.py
-    - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
+    - python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
     # Whisper needs spawn method to avoid deadlock
-    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
+    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
 
 - label: Transformers Backward Compatibility Models Test
   working_dir: "/vllm-workspace/"
@@ -83,7 +83,7 @@ steps:
     - pytest -v -s tests/models/test_transformers.py
     - pytest -v -s tests/models/multimodal/processing/
     - pytest -v -s tests/models/multimodal/test_mapping.py
-    - python3 examples/offline_inference/basic/chat.py
-    - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
+    - python3 examples/basic/offline_inference/chat.py
+    - python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
     # Whisper needs spawn method to avoid deadlock
-    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
+    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
diff --git a/.github/mergify.yml b/.github/mergify.yml
index b96d6b81ac0..8ca00d6e7d2 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -389,11 +389,7 @@ pull_request_rules:
       - files~=^tests/entrypoints/anthropic/.*tool.*
       - files~=^vllm/tool_parsers/
       - files=docs/features/tool_calling.md
-      - files~=^examples/tool_chat_*
-      - files=examples/offline_inference/chat_with_tools.py
-      - files=examples/online_serving/openai_chat_completion_client_with_tools_required.py
-      - files=examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
-      - files=examples/online_serving/openai_chat_completion_client_with_tools.py
+      - files~=^examples/tool_calling/
   actions:
     label:
       add:
diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md
index df7aef3f14f..33796e20e76 100644
--- a/docs/features/multimodal_inputs.md
+++ b/docs/features/multimodal_inputs.md
@@ -68,7 +68,7 @@ You can pass a single image to the `'image'` field of the multi-modal dictionary
         print(generated_text)
     ```
 
-Full example: [examples/offline_inference/vision_language.py](../../examples/offline_inference/vision_language.py)
+Full example: [examples/generate/multimodal/vision_language_offline.py](../../examples/generate/multimodal/vision_language_offline.py)
 
 To substitute multiple images inside the same text prompt, you can pass in a list of images instead:
 
@@ -101,7 +101,7 @@ To substitute multiple images inside the same text prompt, you can pass in a lis
         print(generated_text)
     ```
 
-Full example: [examples/offline_inference/vision_language_multi_image.py](../../examples/offline_inference/vision_language_multi_image.py)
+Full example: [examples/generate/multimodal/vision_language_multi_image_offline.py](../../examples/generate/multimodal/vision_language_multi_image_offline.py)
 
 If using the [LLM.chat](../models/generative_models.md#llmchat) method, you can pass images directly in the message content using various formats: image URLs, PIL Image objects, or pre-computed embeddings:
 
@@ -287,13 +287,13 @@ Instead of NumPy arrays, you can also pass `'torch.Tensor'` instances, as shown
     !!! note
         'process_vision_info' is only applicable to Qwen2.5-VL and similar models.
 
-Full example: [examples/offline_inference/vision_language.py](../../examples/offline_inference/vision_language.py)
+Full example: [examples/generate/multimodal/vision_language_offline.py](../../examples/generate/multimodal/vision_language_offline.py)
 
 ### Audio Inputs
 
 You can pass a tuple `(array, sampling_rate)` to the `'audio'` field of the multi-modal dictionary.
 
-Full example: [examples/offline_inference/audio_language.py](../../examples/offline_inference/audio_language.py)
+Full example: [examples/generate/multimodal/audio_language_offline.py](../../examples/generate/multimodal/audio_language_offline.py)
 
 #### Chunking Long Audio for Transcription
 
@@ -674,7 +674,7 @@ Then, you can use the OpenAI client as follows:
     print("Chat completion output:", chat_response.choices[0].message.content)
     ```
 
-Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
+Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
 
 !!! tip
     Loading from local file paths is also supported on vLLM: You can specify the allowed local media path via `--allowed-local-media-path` when launching the API server/engine,
@@ -745,7 +745,7 @@ Then, you can use the OpenAI client as follows:
     print("Chat completion output from image url:", result)
     ```
 
-Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
+Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
 
 !!! note
     By default, the timeout for fetching videos through HTTP URL is `30` seconds.
@@ -958,7 +958,7 @@ Alternatively, you can pass `audio_url`, which is the audio counterpart of `imag
     print("Chat completion output from audio url:", result)
     ```
 
-Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
+Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
 
 !!! note
     By default, the timeout for fetching audios through HTTP URL is `10` seconds.
diff --git a/docs/features/reasoning_outputs.md b/docs/features/reasoning_outputs.md
index c7b2d688f22..ef3b3ad6ec0 100644
--- a/docs/features/reasoning_outputs.md
+++ b/docs/features/reasoning_outputs.md
@@ -202,7 +202,7 @@ The reasoning content is also available when both tool calling and the reasoning
     print(f"Arguments: {tool_call.arguments}")
     ```
 
-For more examples, please refer to [examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py](../../examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py).
+For more examples, please refer to [examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py](../../examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py).
 
 ## Server-Level Default Chat Template Kwargs
 
diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md
index a2c90e3abd4..59f02a00656 100644
--- a/docs/serving/openai_compatible_server.md
+++ b/docs/serving/openai_compatible_server.md
@@ -251,7 +251,7 @@ The following extra parameters are supported:
 Our Responses API is compatible with [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses);
 you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
 
-Code example: [examples/online_serving/openai_responses_client_with_tools.py](../../examples/online_serving/openai_responses_client_with_tools.py)
+Code example: [examples/online_serving/openai_responses_client_with_tools.py](../../examples/tool_calling/openai_responses_client_with_tools.py)
 
 #### Extra parameters
 
@@ -279,7 +279,7 @@ you can use the [official OpenAI Python client](https://github.com/openai/openai
 !!! note
     To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`.
 
-Code example: [examples/online_serving/openai_transcription_client.py](../../examples/online_serving/openai_transcription_client.py)
+Code example: [examples/speech_to_text/openai/openai_transcription_client.py](../../examples/speech_to_text/openai/openai_transcription_client.py)
 
 NOTE: beam search is currently supported in the transcriptions endpoint for encoder-decoder multimodal models, e.g., whisper, but highly inefficient as work for handling the encoder/decoder cache is actively ongoing. This is an active point of ongoing optimization and will be handled properly in the very near future.
 
@@ -397,7 +397,7 @@ Please mind that the popular `openai/whisper-large-v3-turbo` model does not supp
 !!! note
     To use the Translation API, please install with extra audio dependencies using `pip install vllm[audio]`.
 
-Code example: [examples/online_serving/openai_translation_client.py](../../examples/online_serving/openai_translation_client.py)
+Code example: [examples/speech_to_text/openai/openai_translation_client.py](../../examples/speech_to_text/openai/openai_translation_client.py)
 
 #### Extra Parameters
 
diff --git a/examples/online_serving/batched_chat_completions.py b/examples/generate/batched_chat_completions_online.py
similarity index 100%
rename from examples/online_serving/batched_chat_completions.py
rename to examples/generate/batched_chat_completions_online.py
diff --git a/examples/offline_inference/audio_language.py b/examples/generate/multimodal/audio_language_offline.py
old mode 100755
new mode 100644
similarity index 100%
rename from examples/offline_inference/audio_language.py
rename to examples/generate/multimodal/audio_language_offline.py
diff --git a/examples/offline_inference/encoder_decoder_multimodal.py b/examples/generate/multimodal/encoder_decoder_multimodal_offline.py
similarity index 100%
rename from examples/offline_inference/encoder_decoder_multimodal.py
rename to examples/generate/multimodal/encoder_decoder_multimodal_offline.py
diff --git a/examples/offline_inference/mistral-small.py b/examples/generate/multimodal/mistral-small_offline.py
similarity index 100%
rename from examples/offline_inference/mistral-small.py
rename to examples/generate/multimodal/mistral-small_offline.py
diff --git a/examples/online_serving/openai_chat_completion_client_for_multimodal.py b/examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_for_multimodal.py
rename to examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py
diff --git a/examples/offline_inference/qwen2_5_omni/README.md b/examples/generate/multimodal/qwen2_5_omni/README.md
similarity index 63%
rename from examples/offline_inference/qwen2_5_omni/README.md
rename to examples/generate/multimodal/qwen2_5_omni/README.md
index 409ac0223b5..bd96b080f67 100644
--- a/examples/offline_inference/qwen2_5_omni/README.md
+++ b/examples/generate/multimodal/qwen2_5_omni/README.md
@@ -6,15 +6,15 @@ This folder provides several example scripts on how to inference Qwen2.5-Omni of
 
 ```bash
 # Audio + image + video
-python examples/offline_inference/qwen2_5_omni/only_thinker.py \
+python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
     -q mixed_modalities
 
 # Read vision and audio inputs from a single video file
-python examples/offline_inference/qwen2_5_omni/only_thinker.py \
+python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
     -q use_audio_in_video
 
 # Multiple audios
-python examples/offline_inference/qwen2_5_omni/only_thinker.py \
+python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
     -q multi_audios
 ```
 
@@ -24,16 +24,16 @@ You can also test Qwen2.5-Omni on a single modality:
 
 ```bash
 # Process audio inputs
-python examples/offline_inference/audio_language.py \
+python examples/generate/multimodal/audio_language_offline.py \
     --model-type qwen2_5_omni
 
 # Process image inputs
-python examples/offline_inference/vision_language.py \
+python examples/generate/multimodal/vision_language_offline.py \
     --modality image \
     --model-type qwen2_5_omni
 
 # Process video inputs
-python examples/offline_inference/vision_language.py \
+python examples/generate/multimodal/vision_language_offline.py \
     --modality video \
     --model-type qwen2_5_omni
 ```
diff --git a/examples/offline_inference/qwen2_5_omni/only_thinker.py b/examples/generate/multimodal/qwen2_5_omni/only_thinker.py
similarity index 100%
rename from examples/offline_inference/qwen2_5_omni/only_thinker.py
rename to examples/generate/multimodal/qwen2_5_omni/only_thinker.py
diff --git a/examples/offline_inference/qwen3_omni/only_thinker.py b/examples/generate/multimodal/qwen3_omni/only_thinker.py
similarity index 100%
rename from examples/offline_inference/qwen3_omni/only_thinker.py
rename to examples/generate/multimodal/qwen3_omni/only_thinker.py
diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/generate/multimodal/vision_language_multi_image_offline.py
old mode 100755
new mode 100644
similarity index 100%
rename from examples/offline_inference/vision_language_multi_image.py
rename to examples/generate/multimodal/vision_language_multi_image_offline.py
diff --git a/examples/offline_inference/vision_language.py b/examples/generate/multimodal/vision_language_offline.py
old mode 100755
new mode 100644
similarity index 99%
rename from examples/offline_inference/vision_language.py
rename to examples/generate/multimodal/vision_language_offline.py
index cfeda8804a0..87d42c036ec
--- a/examples/offline_inference/vision_language.py
+++ b/examples/generate/multimodal/vision_language_offline.py
@@ -1402,7 +1402,7 @@ def run_mantis(questions: list[str], modality: str) -> ModelRequestData:
 # MiniCPM-V
 def run_minicpmv_base(questions: list[str], modality: str, model_name):
     assert modality in ["image", "video", "image+video"]
-    # If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language.py` # noqa
+    # If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language_offline.py` # noqa
 
     # 2.0
     # The official repo doesn't work yet, so we need to use a fork for now
diff --git a/examples/offline_inference/qwen_1m.py b/examples/generate/qwen_1m_offline.py
similarity index 100%
rename from examples/offline_inference/qwen_1m.py
rename to examples/generate/qwen_1m_offline.py
diff --git a/examples/online_serving/token_generation_client.py b/examples/generate/token_generation_client.py
similarity index 100%
rename from examples/online_serving/token_generation_client.py
rename to examples/generate/token_generation_client.py
diff --git a/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py b/examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
rename to examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py
diff --git a/examples/online_serving/openai_chat_completion_with_reasoning.py b/examples/reasoning/openai_chat_completion_with_reasoning.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_with_reasoning.py
rename to examples/reasoning/openai_chat_completion_with_reasoning.py
diff --git a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py b/examples/reasoning/openai_chat_completion_with_reasoning_streaming.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
rename to examples/reasoning/openai_chat_completion_with_reasoning_streaming.py
diff --git a/examples/online_serving/openai_responses_client.py b/examples/reasoning/openai_responses_client.py
similarity index 100%
rename from examples/online_serving/openai_responses_client.py
rename to examples/reasoning/openai_responses_client.py
diff --git a/examples/online_serving/openai_lid_client.py b/examples/speech_to_text/lid/openai_lid_client.py
similarity index 100%
rename from examples/online_serving/openai_lid_client.py
rename to examples/speech_to_text/lid/openai_lid_client.py
diff --git a/examples/online_serving/openai_transcription_client.py b/examples/speech_to_text/openai/openai_transcription_client.py
similarity index 100%
rename from examples/online_serving/openai_transcription_client.py
rename to examples/speech_to_text/openai/openai_transcription_client.py
diff --git a/examples/online_serving/openai_translation_client.py b/examples/speech_to_text/openai/openai_translation_client.py
similarity index 100%
rename from examples/online_serving/openai_translation_client.py
rename to examples/speech_to_text/openai/openai_translation_client.py
diff --git a/examples/online_serving/openai_realtime_client.py b/examples/speech_to_text/realtime/openai_realtime_client.py
similarity index 100%
rename from examples/online_serving/openai_realtime_client.py
rename to examples/speech_to_text/realtime/openai_realtime_client.py
diff --git a/examples/online_serving/openai_realtime_microphone_client.py b/examples/speech_to_text/realtime/openai_realtime_microphone_client.py
similarity index 100%
rename from examples/online_serving/openai_realtime_microphone_client.py
rename to examples/speech_to_text/realtime/openai_realtime_microphone_client.py
diff --git a/examples/offline_inference/chat_with_tools.py b/examples/tool_calling/chat_with_tools_offline.py
similarity index 100%
rename from examples/offline_inference/chat_with_tools.py
rename to examples/tool_calling/chat_with_tools_offline.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools.py b/examples/tool_calling/openai_chat_completion_client_with_tools.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools_required.py b/examples/tool_calling/openai_chat_completion_client_with_tools_required.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools_required.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools_required.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools_xlam.py b/examples/tool_calling/openai_chat_completion_client_with_tools_xlam.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools_xlam.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools_xlam.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools_xlam_streaming.py b/examples/tool_calling/openai_chat_completion_client_with_tools_xlam_streaming.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools_xlam_streaming.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools_xlam_streaming.py
diff --git a/examples/online_serving/openai_responses_client_with_mcp_tools.py b/examples/tool_calling/openai_responses_client_with_mcp_tools.py
similarity index 100%
rename from examples/online_serving/openai_responses_client_with_mcp_tools.py
rename to examples/tool_calling/openai_responses_client_with_mcp_tools.py
diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/tool_calling/openai_responses_client_with_tools.py
similarity index 100%
rename from examples/online_serving/openai_responses_client_with_tools.py
rename to examples/tool_calling/openai_responses_client_with_tools.py