up

2025-09-22 16:46:49 +05:30 · 2025-09-18 17:23:04 +05:30 · 2025-09-18 14:59:00 +05:30 · 2025-09-18 14:49:48 +05:30 · 2025-09-16 18:09:12 +05:30 · 2025-09-15 16:26:52 +05:30
633 changed files with 15049 additions and 5218 deletions
@@ -340,6 +340,9 @@ jobs:
          - backend: "optimum_quanto"
            test_location: "quanto"
            additional_deps: []
+          - backend: "nvidia_modelopt"
+            test_location: "modelopt"
+            additional_deps: []
    runs-on:
      group: aws-g6e-xlarge-plus
    container:
@@ -1,38 +0,0 @@
-name: Run Flax dependency tests
-
-on:
-  pull_request:
-    branches:
-      - main
-    paths:
-      - "src/diffusers/**.py"
-  push:
-    branches:
-      - main
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  check_flax_dependencies:
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.8"
-      - name: Install dependencies
-        run: |
-          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          python -m pip install --upgrade pip uv
-          python -m uv pip install -e .
-          python -m uv pip install "jax[cpu]>=0.2.16,!=0.3.2"
-          python -m uv pip install "flax>=0.4.1"
-          python -m uv pip install "jaxlib>=0.1.65"
-          python -m uv pip install pytest
-      - name: Check for soft dependencies
-        run: |
-          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
-          pytest tests/others/test_dependencies.py
@@ -37,7 +37,7 @@ limitations under the License.

 ## Installation

-We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/) and [Flax](https://flax.readthedocs.io/en/latest/#installation), please refer to their official documentation.
+We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/), please refer to their official documentation.

 ### PyTorch

@@ -53,14 +53,6 @@ With `conda` (maintained by the community):
 conda install -c conda-forge diffusers
 ```

-### Flax
-
-With `pip` (official package):
-
-```bash
-pip install --upgrade diffusers[flax]
-```
-
 ### Apple Silicon (M1/M2) support

 Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide.
@@ -1,49 +0,0 @@
-FROM ubuntu:20.04
-LABEL maintainer="Hugging Face"
-LABEL repository="diffusers"
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get -y update \
-    && apt-get install -y software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa
-
-RUN apt install -y bash \
-        build-essential \
-        git \
-        git-lfs \
-        curl \
-        ca-certificates \
-        libsndfile1-dev \
-        libgl1 \
-        python3.10 \
-        python3-pip \
-        python3.10-venv && \
-    rm -rf /var/lib/apt/lists
-
-# make sure to use venv
-RUN python3.10 -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-
-# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
-# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
-RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
-    python3 -m uv pip install --upgrade --no-cache-dir \
-        clu \
-        "jax[cpu]>=0.2.16,!=0.3.2" \
-        "flax>=0.4.1" \
-        "jaxlib>=0.1.65" && \
-    python3 -m uv pip install --no-cache-dir \
-        accelerate \
-        datasets \
-        hf-doc-builder \
-        huggingface-hub \
-        Jinja2 \
-        librosa \
-        numpy==1.26.4 \
-        scipy \
-        tensorboard \
-        transformers \
-        hf_transfer
-
-CMD ["/bin/bash"]
@@ -1,51 +0,0 @@
-FROM ubuntu:20.04
-LABEL maintainer="Hugging Face"
-LABEL repository="diffusers"
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get -y update \
-    && apt-get install -y software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa
-
-RUN apt install -y bash \
-                   build-essential \
-                   git \
-                   git-lfs \
-                   curl \
-                   ca-certificates \
-                   libsndfile1-dev \
-                   libgl1 \
-                   python3.10 \
-                   python3-pip \
-                   python3.10-venv && \
-    rm -rf /var/lib/apt/lists
-
-# make sure to use venv
-RUN python3.10 -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-
-# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
-# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
-RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
-    python3 -m pip install --no-cache-dir \
-        "jax[tpu]>=0.2.16,!=0.3.2" \
-        -f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
-    python3 -m uv pip install --upgrade --no-cache-dir \
-        clu \
-        "flax>=0.4.1" \
-        "jaxlib>=0.1.65" && \
-    python3 -m uv pip install --no-cache-dir \
-        accelerate \
-        datasets \
-        hf-doc-builder \
-        huggingface-hub \
-        Jinja2 \
-        librosa \
-        numpy==1.26.4 \
-        scipy \
-        tensorboard \
-        transformers \
-        hf_transfer
-
-CMD ["/bin/bash"]
@@ -9,27 +9,29 @@
  - local: stable_diffusion
    title: Basic performance

- title: DiffusionPipeline
+- title: Pipelines
  isExpanded: false
  sections:
  - local: using-diffusers/loading
-    title: Load pipelines
+    title: DiffusionPipeline
  - local: tutorials/autopipeline
    title: AutoPipeline
  - local: using-diffusers/custom_pipeline_overview
-    title: Load community pipelines and components
+    title: Community pipelines and components
  - local: using-diffusers/callback
    title: Pipeline callbacks
  - local: using-diffusers/reusing_seeds
-    title: Reproducible pipelines
+    title: Reproducibility
  - local: using-diffusers/schedulers
    title: Load schedulers and models
+  - local: using-diffusers/models
+    title: Models
  - local: using-diffusers/scheduler_features
    title: Scheduler features
  - local: using-diffusers/other-formats
    title: Model files and layouts
  - local: using-diffusers/push_to_hub
-    title: Push files to the Hub
+    title: Sharing pipelines and models

 - title: Adapters
  isExpanded: false
@@ -58,14 +60,6 @@
    title: Batch inference
  - local: training/distributed_inference
    title: Distributed inference
-  - local: using-diffusers/scheduler_features
-    title: Scheduler features
-  - local: using-diffusers/callback
-    title: Pipeline callbacks
-  - local: using-diffusers/reusing_seeds
-    title: Reproducible pipelines
-  - local: using-diffusers/image_quality
-    title: Controlling image quality

 - title: Inference optimization
  isExpanded: false
@@ -77,7 +71,7 @@
  - local: optimization/memory
    title: Reduce memory usage
  - local: optimization/speed-memory-optims
-    title: Compile and offloading quantized models
+    title: Compiling and offloading quantized models
  - title: Community optimizations
    sections:
    - local: optimization/pruna
@@ -94,6 +88,8 @@
      title: xDiT
    - local: optimization/para_attn
      title: ParaAttention
+    - local: using-diffusers/image_quality
+      title: FreeU

 - title: Hybrid Inference
  isExpanded: false
@@ -190,12 +186,12 @@
    title: torchao
  - local: quantization/quanto
    title: quanto
+  - local: quantization/modelopt
+    title: NVIDIA ModelOpt

 - title: Model accelerators and hardware
  isExpanded: false
  sections:
-  - local: using-diffusers/stable_diffusion_jax_how_to
-    title: JAX/Flax
  - local: optimization/onnx
    title: ONNX
  - local: optimization/open_vino
@@ -20,6 +20,12 @@ All pipelines with [`VaeImageProcessor`] accept PIL Image, PyTorch tensor, or Nu

 [[autodoc]] image_processor.VaeImageProcessor

+## InpaintProcessor
+
+The [`InpaintProcessor`] accepts `mask` and `image` inputs and process them together. Optionally, it can accept padding_mask_crop and apply mask overlay.
+
+[[autodoc]] image_processor.InpaintProcessor
+
 ## VaeImageProcessorLDM3D

 The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs.
@@ -44,15 +44,3 @@ model = AutoencoderKL.from_single_file(url)
 ## DecoderOutput

 [[autodoc]] models.autoencoders.vae.DecoderOutput
-
-## FlaxAutoencoderKL
-
-[[autodoc]] FlaxAutoencoderKL
-
-## FlaxAutoencoderKLOutput
-
-[[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput
-
-## FlaxDecoderOutput
-
-[[autodoc]] models.vae_flax.FlaxDecoderOutput
@@ -40,11 +40,3 @@ pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=contro
 ## ControlNetOutput

 [[autodoc]] models.controlnets.controlnet.ControlNetOutput
-
-## FlaxControlNetModel
-
-[[autodoc]] FlaxControlNetModel
-
-## FlaxControlNetOutput
-
-[[autodoc]] models.controlnets.controlnet_flax.FlaxControlNetOutput
@@ -19,10 +19,6 @@ All models are built from the base [`ModelMixin`] class which is a [`torch.nn.Mo
 ## ModelMixin
 [[autodoc]] ModelMixin

-## FlaxModelMixin
-
-[[autodoc]] FlaxModelMixin
-
 ## PushToHubMixin

 [[autodoc]] utils.PushToHubMixin
@@ -23,9 +23,3 @@ The abstract from the paper is:

 ## UNet2DConditionOutput
 [[autodoc]] models.unets.unet_2d_condition.UNet2DConditionOutput
-
-## FlaxUNet2DConditionModel
-[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionModel
-
-## FlaxUNet2DConditionOutput
-[[autodoc]] models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput
@@ -54,10 +54,6 @@ To check a specific pipeline or model output, refer to its corresponding API doc

 [[autodoc]] pipelines.ImagePipelineOutput

-## FlaxImagePipelineOutput
-
-[[autodoc]] pipelines.pipeline_flax_utils.FlaxImagePipelineOutput
-
 ## AudioPipelineOutput

 [[autodoc]] pipelines.AudioPipelineOutput
@@ -50,7 +50,7 @@ from diffusers.utils import export_to_video
 pipeline_quant_config = PipelineQuantizationConfig(
  quant_backend="torchao",
  quant_kwargs={"quant_type": "int8wo"},
-  components_to_quantize=["transformer"]
+  components_to_quantize="transformer"
 )

 # fp8 layerwise weight-casting
@@ -72,11 +72,3 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)

 ## StableDiffusionPipelineOutput
 [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
-
-## FlaxStableDiffusionControlNetPipeline
-[[autodoc]] FlaxStableDiffusionControlNetPipeline
-	- all
-	- __call__
-
-## FlaxStableDiffusionControlNetPipelineOutput
-[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput
@@ -54,7 +54,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
      "bnb_4bit_quant_type": "nf4",
      "bnb_4bit_compute_dtype": torch.bfloat16
      },
-    components_to_quantize=["transformer"]
+    components_to_quantize="transformer"
 )

 pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -91,7 +91,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
      "bnb_4bit_quant_type": "nf4",
      "bnb_4bit_compute_dtype": torch.bfloat16
      },
-    components_to_quantize=["transformer"]
+    components_to_quantize="transformer"
 )

 pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -139,7 +139,7 @@ export_to_video(video, "output.mp4", fps=15)
        "bnb_4bit_quant_type": "nf4",
        "bnb_4bit_compute_dtype": torch.bfloat16
        },
-      components_to_quantize=["transformer"]
+      components_to_quantize="transformer"
  )

  pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -106,10 +106,20 @@ The table below lists all the pipelines currently available in 🤗 Diffusers an

 [[autodoc]] pipelines.StableDiffusionMixin.disable_freeu

-## FlaxDiffusionPipeline
-
-[[autodoc]] pipelines.pipeline_flax_utils.FlaxDiffusionPipeline
-
 ## PushToHubMixin

 [[autodoc]] utils.PushToHubMixin
+
+## Callbacks
+
+[[autodoc]] callbacks.PipelineCallback
+
+[[autodoc]] callbacks.SDCFGCutoffCallback
+
+[[autodoc]] callbacks.SDXLCFGCutoffCallback
+
+[[autodoc]] callbacks.SDXLControlnetCFGCutoffCallback
+
+[[autodoc]] callbacks.IPAdapterScaleCutoffCallback
+
+[[autodoc]] callbacks.SD3CFGCutoffCallback
@@ -120,6 +120,16 @@ The `guidance_scale` parameter in the pipeline is there to support future guidan
  - all
  - __call__

+## QwenImageEditInpaintPipeline
+
+[[autodoc]] QwenImageEditInpaintPipeline
+  - all
+  - __call__
+
+## QwenImaggeControlNetPipeline
+  - all
+  - __call__
+
 ## QwenImagePipelineOutput

 [[autodoc]] pipelines.qwenimage.pipeline_output.QwenImagePipelineOutput
@@ -1,4 +1,4 @@
-<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@

 # SkyReels-V2: Infinite-length Film Generative model

-[SkyReels-V2](https://huggingface.co/papers/2504.13074) by the SkyReels Team.
+[SkyReels-V2](https://huggingface.co/papers/2504.13074) by the SkyReels Team from Skywork AI.

 *Recent advances in video generation have been driven by diffusion models and autoregressive frameworks, yet critical challenges persist in harmonizing prompt adherence, visual quality, motion dynamics, and duration: compromises in motion dynamics to enhance temporal visual quality, constrained video duration (5-10 seconds) to prioritize resolution, and inadequate shot-aware generation stemming from general-purpose MLLMs' inability to interpret cinematic grammar, such as shot composition, actor expressions, and camera motions. These intertwined limitations hinder realistic long-form synthesis and professional film-style generation. To address these limitations, we propose SkyReels-V2, an Infinite-length Film Generative Model, that synergizes Multi-modal Large Language Model (MLLM), Multi-stage Pretraining, Reinforcement Learning, and Diffusion Forcing Framework. Firstly, we design a comprehensive structural representation of video that combines the general descriptions by the Multi-modal LLM and the detailed shot language by sub-expert models. Aided with human annotation, we then train a unified Video Captioner, named SkyCaptioner-V1, to efficiently label the video data. Secondly, we establish progressive-resolution pretraining for the fundamental video generation, followed by a four-stage post-training enhancement: Initial concept-balanced Supervised Fine-Tuning (SFT) improves baseline quality; Motion-specific Reinforcement Learning (RL) training with human-annotated and synthetic distortion data addresses dynamic artifacts; Our diffusion forcing framework with non-decreasing noise schedules enables long-video synthesis in an efficient search space; Final high-quality SFT refines visual fidelity. All the code and models are available at [this https URL](https://github.com/SkyworkAI/SkyReels-V2).*

@@ -44,93 +44,113 @@ The following SkyReels-V2 models are supported in Diffusers:

 ### A _Visual_ Demonstration

-        An example with these parameters:
-        base_num_frames=97, num_frames=97, num_inference_steps=30, ar_step=5, causal_block_size=5
+The example below has the following parameters:

-        vae_scale_factor_temporal -> 4
-        num_latent_frames: (97-1)//vae_scale_factor_temporal+1 = 25 frames -> 5 blocks of 5 frames each
+- `base_num_frames=97`
+- `num_frames=97`
+- `num_inference_steps=30`
+- `ar_step=5`
+- `causal_block_size=5`

-        base_num_latent_frames = (97-1)//vae_scale_factor_temporal+1 = 25 → blocks = 25//5 = 5 blocks
-        This 5 blocks means the maximum context length of the model is 25 frames in the latent space.
+With `vae_scale_factor_temporal=4`, expect `5` blocks of `5` frames each as calculated by:

-        Asynchronous Processing Timeline:
-        ┌─────────────────────────────────────────────────────────────────┐
-        │ Steps:    1    6   11   16   21   26   31   36   41   46   50   │
-        │ Block 1: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]                       │
-        │ Block 2:      [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]                  │
-        │ Block 3:           [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]             │
-        │ Block 4:                [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]        │
-        │ Block 5:                     [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]   │
-        └─────────────────────────────────────────────────────────────────┘
+`num_latent_frames: (97-1)//vae_scale_factor_temporal+1 = 25 frames -> 5 blocks of 5 frames each`

-        For Long Videos (num_frames > base_num_frames):
-        base_num_frames acts as the "sliding window size" for processing long videos.
+And the maximum context length in the latent space is calculated with `base_num_latent_frames`:

-        Example: 257-frame video with base_num_frames=97, overlap_history=17
-        ┌──── Iteration 1 (frames 1-97) ────┐
-        │ Processing window: 97 frames      │ → 5 blocks, async processing
-        │ Generates: frames 1-97            │
-        └───────────────────────────────────┘
-                    ┌────── Iteration 2 (frames 81-177) ──────┐
-                    │ Processing window: 97 frames            │
-                    │ Overlap: 17 frames (81-97) from prev    │ → 5 blocks, async processing
-                    │ Generates: frames 98-177                │
-                    └─────────────────────────────────────────┘
-                                ┌────── Iteration 3 (frames 161-257) ──────┐
-                                │ Processing window: 97 frames             │
-                                │ Overlap: 17 frames (161-177) from prev   │ → 5 blocks, async processing
-                                │ Generates: frames 178-257                │
-                                └──────────────────────────────────────────┘
+`base_num_latent_frames = (97-1)//vae_scale_factor_temporal+1 = 25 -> 25//5 = 5 blocks`

-        Each iteration independently runs the asynchronous processing with its own 5 blocks.
-        base_num_frames controls:
-        1. Memory usage (larger window = more VRAM)
-        2. Model context length (must match training constraints)
-        3. Number of blocks per iteration (base_num_latent_frames // causal_block_size)
+Asynchronous Processing Timeline:
+```text
+┌─────────────────────────────────────────────────────────────────┐
+│ Steps:    1    6   11   16   21   26   31   36   41   46   50   │
+│ Block 1: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]                       │
+│ Block 2:      [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]                  │
+│ Block 3:           [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]             │
+│ Block 4:                [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]        │
+│ Block 5:                     [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■]   │
+└─────────────────────────────────────────────────────────────────┘
+```

-        Each block takes 30 steps to complete denoising.
-        Block N starts at step: 1 + (N-1) x ar_step
-        Total steps: 30 + (5-1) x 5 = 50 steps
+For Long Videos (`num_frames` > `base_num_frames`):
+`base_num_frames` acts as the "sliding window size" for processing long videos.
+
+Example: `257`-frame video with `base_num_frames=97`, `overlap_history=17`
+```text
+┌──── Iteration 1 (frames 1-97) ────┐
+│ Processing window: 97 frames      │ → 5 blocks,
+│ Generates: frames 1-97            │   async processing
+└───────────────────────────────────┘
+            ┌────── Iteration 2 (frames 81-177) ──────┐
+            │ Processing window: 97 frames            │
+            │ Overlap: 17 frames (81-97) from prev    │ → 5 blocks,
+            │ Generates: frames 98-177                │   async processing
+            └─────────────────────────────────────────┘
+                        ┌────── Iteration 3 (frames 161-257) ──────┐
+                        │ Processing window: 97 frames             │
+                        │ Overlap: 17 frames (161-177) from prev   │ → 5 blocks,
+                        │ Generates: frames 178-257                │   async processing
+                        └──────────────────────────────────────────┘
+```
+
+Each iteration independently runs the asynchronous processing with its own `5` blocks.
+`base_num_frames` controls:
+1. Memory usage (larger window = more VRAM)
+2. Model context length (must match training constraints)
+3. Number of blocks per iteration (`base_num_latent_frames // causal_block_size`)
+
+Each block takes `30` steps to complete denoising.
+Block N starts at step: `1 + (N-1) x ar_step`
+Total steps: `30 + (5-1) x 5 = 50` steps


-        Synchronous mode (ar_step=0) would process all blocks/frames simultaneously:
-        ┌──────────────────────────────────────────────┐
-        │ Steps:       1            ...            30  │
-        │ All blocks: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
-        └──────────────────────────────────────────────┘
-        Total steps: 30 steps
+Synchronous mode (`ar_step=0`) would process all blocks/frames simultaneously:
+```text
+┌──────────────────────────────────────────────┐
+│ Steps:       1            ...            30  │
+│ All blocks: [■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■] │
+└──────────────────────────────────────────────┘
+```
+Total steps: `30` steps


-        An example on how the step matrix is constructed for asynchronous processing:
-        Given the parameters: (num_inference_steps=30, flow_shift=8, num_frames=97, ar_step=5, causal_block_size=5)
-        - num_latent_frames = (97 frames - 1) // (4 temporal downsampling) + 1 = 25
-        - step_template = [999, 995, 991, 986, 980, 975, 969, 963, 956, 948,
-                           941, 932, 922, 912, 901, 888, 874, 859, 841, 822,
-                           799, 773, 743, 708, 666, 615, 551, 470, 363, 216]
+An example on how the step matrix is constructed for asynchronous processing:
+Given the parameters: (`num_inference_steps=30, flow_shift=8, num_frames=97, ar_step=5, causal_block_size=5`)
+```
+- num_latent_frames = (97 frames - 1) // (4 temporal downsampling) + 1 = 25
+- step_template = [999, 995, 991, 986, 980, 975, 969, 963, 956, 948,
+                   941, 932, 922, 912, 901, 888, 874, 859, 841, 822,
+                   799, 773, 743, 708, 666, 615, 551, 470, 363, 216]
+```

-        The algorithm creates a 50x25 step_matrix where:
-        - Row 1:  [999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999]
-        - Row 2:  [995, 995, 995, 995, 995, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999]
-        - Row 3:  [991, 991, 991, 991, 991, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999]
-        - ...
-        - Row 7:  [969, 969, 969, 969, 969, 995, 995, 995, 995, 995, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999]
-        - ...
-        - Row 21: [799, 799, 799, 799, 799, 888, 888, 888, 888, 888, 941, 941, 941, 941, 941, 975, 975, 975, 975, 975, 999, 999, 999, 999, 999]
-        - ...
-        - Row 35: [  0,   0,   0,   0,   0, 216, 216, 216, 216, 216, 666, 666, 666, 666, 666, 822, 822, 822, 822, 822, 901, 901, 901, 901, 901]
-        - ...
-        - Row 42: [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 551, 551, 551, 551, 551, 773, 773, 773, 773, 773]
-        - ...
-        - Row 50: [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 216, 216, 216, 216, 216]
+The algorithm creates a `50x25` `step_matrix` where:
+```
+- Row 1:  [999×5, 999×5, 999×5, 999×5, 999×5]
+- Row 2:  [995×5, 999×5, 999×5, 999×5, 999×5]
+- Row 3:  [991×5, 999×5, 999×5, 999×5, 999×5]
+- ...
+- Row 7:  [969×5, 995×5, 999×5, 999×5, 999×5]
+- ...
+- Row 21: [799×5, 888×5, 941×5, 975×5, 999×5]
+- ...
+- Row 35: [  0×5, 216×5, 666×5, 822×5, 901×5]
+- ...
+- Row 42: [  0×5,   0×5,   0×5, 551×5, 773×5]
+- ...
+- Row 50: [  0×5,   0×5,   0×5,   0×5, 216×5]
+```

-        Detailed Row 6 Analysis:
-        - step_matrix[5]:       [ 975, 975, 975, 975, 975, 999, 999, 999, 999, 999, 999,  ...,  999]
-        - step_index[5]:        [   6,   6,   6,   6,   6,   1,   1,   1,   1,   1,   0,  ...,    0]
-        - step_update_mask[5]:  [True,True,True,True,True,True,True,True,True,True,False, ...,False]
-        - valid_interval[5]:    (0, 25)
+Detailed Row `6` Analysis:
+```
+- step_matrix[5]:      [ 975×5,  999×5,   999×5,   999×5,   999×5]
+- step_index[5]:       [   6×5,    1×5,     0×5,     0×5,     0×5]
+- step_update_mask[5]: [True×5, True×5, False×5, False×5, False×5]
+- valid_interval[5]:   (0, 25)
+```
+
+Key Pattern: Block `i` lags behind Block `i-1` by exactly `ar_step=5` timesteps, creating the
+staggered "diffusion forcing" effect where later blocks condition on cleaner earlier blocks.

-        Key Pattern: Block i lags behind Block i-1 by exactly ar_step=5 timesteps, creating the
-        staggered "diffusion forcing" effect where later blocks condition on cleaner earlier blocks.

 ### Text-to-Video Generation

@@ -145,23 +165,22 @@ From the original repo:
 >You can use --ar_step 5 to enable asynchronous inference. When asynchronous inference, --causal_block_size 5 is recommended while it is not supposed to be set for synchronous generation... Asynchronous inference will take more steps to diffuse the whole sequence which means it will be SLOWER than synchronous mode. In our experiments, asynchronous inference may improve the instruction following and visual consistent performance.

 ```py
-# pip install ftfy
 import torch
 from diffusers import AutoModel, SkyReelsV2DiffusionForcingPipeline, UniPCMultistepScheduler
 from diffusers.utils import export_to_video

-vae = AutoModel.from_pretrained("Skywork/SkyReels-V2-DF-14B-540P-Diffusers", subfolder="vae", torch_dtype=torch.float32)
-transformer = AutoModel.from_pretrained("Skywork/SkyReels-V2-DF-14B-540P-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
+
+model_id = "Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers"
+vae = AutoModel.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)

 pipeline = SkyReelsV2DiffusionForcingPipeline.from_pretrained(
-    "Skywork/SkyReels-V2-DF-14B-540P-Diffusers",
+    model_id,
    vae=vae,
-    transformer=transformer,
-    torch_dtype=torch.bfloat16
+    torch_dtype=torch.bfloat16,
 )
+pipeline.to("cuda")
 flow_shift = 8.0  # 8.0 for T2V, 5.0 for I2V
 pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
-pipeline = pipeline.to("cuda")

 prompt = "A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."

@@ -177,7 +196,7 @@ output = pipeline(
    overlap_history=None,  # Number of frames to overlap for smooth transitions in long videos; 17 for long video generations
    addnoise_condition=20,  # Improves consistency in long video generation
 ).frames[0]
-export_to_video(output, "T2V.mp4", fps=24, quality=8)
+export_to_video(output, "video.mp4", fps=24, quality=8)
 ```

 </hfoption>
@@ -198,14 +217,14 @@ from diffusers import AutoencoderKLWan, SkyReelsV2DiffusionForcingImageToVideoPi
 from diffusers.utils import export_to_video, load_image


-model_id = "Skywork/SkyReels-V2-DF-14B-720P-Diffusers"
+model_id = "Skywork/SkyReels-V2-DF-1.3B-720P-Diffusers"
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
 pipeline = SkyReelsV2DiffusionForcingImageToVideoPipeline.from_pretrained(
    model_id, vae=vae, torch_dtype=torch.bfloat16
 )
+pipeline.to("cuda")
 flow_shift = 5.0  # 8.0 for T2V, 5.0 for I2V
 pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
-pipeline.to("cuda")

 first_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png")
 last_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_last_frame.png")
@@ -239,7 +258,7 @@ prompt = "CG animation style, a small blue bird takes off from the ground, flapp
 output = pipeline(
    image=first_frame, last_image=last_frame, prompt=prompt, height=height, width=width, guidance_scale=5.0
 ).frames[0]
-export_to_video(output, "output.mp4", fps=24, quality=8)
+export_to_video(output, "video.mp4", fps=24, quality=8)
 ```

 </hfoption>
@@ -261,75 +280,35 @@ from diffusers import AutoencoderKLWan, SkyReelsV2DiffusionForcingVideoToVideoPi
 from diffusers.utils import export_to_video, load_video


-model_id = "Skywork/SkyReels-V2-DF-14B-540P-Diffusers"
+model_id = "Skywork/SkyReels-V2-DF-1.3B-720P-Diffusers"
 vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
 pipeline = SkyReelsV2DiffusionForcingVideoToVideoPipeline.from_pretrained(
    model_id, vae=vae, torch_dtype=torch.bfloat16
 )
+pipeline.to("cuda")
 flow_shift = 5.0  # 8.0 for T2V, 5.0 for I2V
 pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
-pipeline.to("cuda")

 video = load_video("input_video.mp4")

 prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."

 output = pipeline(
-    video=video, prompt=prompt, height=544, width=960, guidance_scale=5.0,
-    num_inference_steps=30, num_frames=257, base_num_frames=97#, ar_step=5, causal_block_size=5,
+    video=video, prompt=prompt, height=720, width=1280, guidance_scale=5.0, overlap_history=17,
+    num_inference_steps=30, num_frames=257, base_num_frames=121#, ar_step=5, causal_block_size=5,
 ).frames[0]
-export_to_video(output, "output.mp4", fps=24, quality=8)
-# Total frames will be the number of frames of given video + 257
+export_to_video(output, "video.mp4", fps=24, quality=8)
+# Total frames will be the number of frames of the given video + 257
 ```

 </hfoption>
 </hfoptions>

-
 ## Notes

 - SkyReels-V2 supports LoRAs with [`~loaders.SkyReelsV2LoraLoaderMixin.load_lora_weights`].

-  <details>
-  <summary>Show example code</summary>
-
-  ```py
-  # pip install ftfy
-  import torch
-  from diffusers import AutoModel, SkyReelsV2DiffusionForcingPipeline
-  from diffusers.utils import export_to_video
-
-  vae = AutoModel.from_pretrained(
-      "Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers", subfolder="vae", torch_dtype=torch.float32
-  )
-  pipeline = SkyReelsV2DiffusionForcingPipeline.from_pretrained(
-      "Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers", vae=vae, torch_dtype=torch.bfloat16
-  )
-  pipeline.to("cuda")
-
-  pipeline.load_lora_weights("benjamin-paine/steamboat-willie-1.3b", adapter_name="steamboat-willie")
-  pipeline.set_adapters("steamboat-willie")
-
-  pipeline.enable_model_cpu_offload()
-
-  # use "steamboat willie style" to trigger the LoRA
-  prompt = """
-  steamboat willie style, golden era animation, The camera rushes from far to near in a low-angle shot,
-  revealing a white ferret on a log. It plays, leaps into the water, and emerges, as the camera zooms in
-  for a close-up. Water splashes berry bushes nearby, while moss, snow, and leaves blanket the ground.
-  Birch trees and a light blue sky frame the scene, with ferns in the foreground. Side lighting casts dynamic
-  shadows and warm highlights. Medium composition, front view, low angle, with depth of field.
-  """
-
-  output = pipeline(
-      prompt=prompt,
-      num_frames=97,
-      guidance_scale=6.0,
-  ).frames[0]
-  export_to_video(output, "output.mp4", fps=24)
-  ```
-
-  </details>
+`SkyReelsV2Pipeline` and `SkyReelsV2ImageToVideoPipeline` are also available without Diffusion Forcing framework applied.


 ## SkyReelsV2DiffusionForcingPipeline
@@ -364,4 +343,4 @@ export_to_video(output, "output.mp4", fps=24, quality=8)

 ## SkyReelsV2PipelineOutput

-[[autodoc]] pipelines.skyreels_v2.pipeline_output.SkyReelsV2PipelineOutput
+[[autodoc]] pipelines.skyreels_v2.pipeline_output.SkyReelsV2PipelineOutput
@@ -47,13 +47,3 @@ Make sure to check out the Stable Diffusion [Tips](overview#tips) section to lea
 ## StableDiffusionPipelineOutput

 [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
-
-## FlaxStableDiffusionImg2ImgPipeline
-
-[[autodoc]] FlaxStableDiffusionImg2ImgPipeline
-	- all
-	- __call__
-
-## FlaxStableDiffusionPipelineOutput
-
-[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput
@@ -49,13 +49,3 @@ If you're interested in using one of the official checkpoints for a task, explor
 ## StableDiffusionPipelineOutput

 [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
-
-## FlaxStableDiffusionInpaintPipeline
-
-[[autodoc]] FlaxStableDiffusionInpaintPipeline
-	- all
-	- __call__
-
-## FlaxStableDiffusionPipelineOutput
-
-[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput
@@ -51,13 +51,3 @@ If you're interested in using one of the official checkpoints for a task, explor
 ## StableDiffusionPipelineOutput

 [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
-
-## FlaxStableDiffusionPipeline
-
-[[autodoc]] FlaxStableDiffusionPipeline
-	- all
-	- __call__
-
-## FlaxStableDiffusionPipelineOutput
-
-[[autodoc]] pipelines.stable_diffusion.FlaxStableDiffusionPipelineOutput
@@ -20,7 +20,7 @@
  </div>
 </div>

-# Wan2.1
+# Wan

 [Wan-2.1](https://huggingface.co/papers/2503.20314) by the Wan Team.

@@ -42,7 +42,7 @@ The following Wan models are supported in Diffusers:
 - [Wan 2.2 TI2V 5B](https://huggingface.co/Wan-AI/Wan2.2-TI2V-5B-Diffusers)

 > [!TIP]
-> Click on the Wan2.1 models in the right sidebar for more examples of video generation.
+> Click on the Wan models in the right sidebar for more examples of video generation.

 ### Text-to-Video Generation

@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.

 # Installation

-Diffusers is tested on Python 3.8+, PyTorch 1.4+, and Flax 0.4.1+. Follow the installation instructions for the deep learning library you're using, [PyTorch](https://pytorch.org/get-started/locally/) or [Flax](https://flax.readthedocs.io/en/latest/).
+Diffusers is tested on Python 3.8+ and PyTorch 1.4+. Install [PyTorch](https://pytorch.org/get-started/locally/) according to your system and setup.

 Create a [virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/) for easier management of separate projects and to avoid compatibility issues between dependencies. Use [uv](https://docs.astral.sh/uv/), a Rust-based Python package and project manager, to create a virtual environment and install Diffusers.

@@ -32,12 +32,6 @@ PyTorch only supports Python 3.8 - 3.11 on Windows.
 uv pip install diffusers["torch"] transformers
 ```

-Use the command below for Flax.
-
-```bash
-uv pip install diffusers["flax"] transformers
-```
-
 </hfoption>
 <hfoption id="conda">

@@ -71,27 +65,12 @@ An editable install is recommended for development workflows or if you're using

 Clone the repository and install Diffusers with the following commands.

-<hfoptions id="editable">
-<hfoption id="PyTorch">
-
 ```bash
 git clone https://github.com/huggingface/diffusers.git
 cd diffusers
 uv pip install -e ".[torch]"
 ```

-</hfoption>
-<hfoption id="Flax">
-
-```bash
-git clone https://github.com/huggingface/diffusers.git
-cd diffusers
-uv pip install -e ".[flax]"
-```
-
-</hfoption>
-</hfoptions>
-
 > [!WARNING]
 > You must keep the `diffusers` folder if you want to keep using the library with the editable install.

@@ -140,7 +119,7 @@ For more details about managing and cleaning the cache, take a look at the [Unde
 ## Telemetry logging

 Diffusers gathers telemetry information during [`~DiffusionPipeline.from_pretrained`] requests.
-The data gathered includes the Diffusers and PyTorch/Flax version, the requested model or pipeline class,
+The data gathered includes the Diffusers and PyTorch version, the requested model or pipeline class,
 and the path to a pretrained checkpoint if it is hosted on the Hub.

 This usage data helps us debug issues and prioritize new features.
@@ -209,7 +209,7 @@ There is also a [compile_regions](https://github.com/huggingface/accelerate/blob
 # pip install -U accelerate
 import torch
 from diffusers import StableDiffusionXLPipeline
-from accelerate.utils import compile regions
+from accelerate.utils import compile_regions

 pipeline = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
@@ -291,13 +291,53 @@ Group offloading moves groups of internal layers ([torch.nn.ModuleList](https://
 > [!WARNING]
 > Group offloading may not work with all models if the forward implementation contains weight-dependent device casting of inputs because it may clash with group offloading's device casting mechanism.

-Call [`~ModelMixin.enable_group_offload`] to enable it for standard Diffusers model components that inherit from [`ModelMixin`]. For other model components that don't inherit from [`ModelMixin`], such as a generic [torch.nn.Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html), use [`~hooks.apply_group_offloading`] instead.
-
-The `offload_type` parameter can be set to `block_level` or `leaf_level`.
+Enable group offloading by configuring the `offload_type` parameter to `block_level` or `leaf_level`.

 - `block_level` offloads groups of layers based on the `num_blocks_per_group` parameter. For example, if `num_blocks_per_group=2` on a model with 40 layers, 2 layers are onloaded and offloaded at a time (20 total onloads/offloads). This drastically reduces memory requirements.
 - `leaf_level` offloads individual layers at the lowest level and is equivalent to [CPU offloading](#cpu-offloading). But it can be made faster if you use streams without giving up inference speed.

+Group offloading is supported for entire pipelines or individual models. Applying group offloading to the entire pipeline is the easiest option while selectively applying it to individual models gives users more flexibility to use different offloading techniques for different models.
+
+<hfoptions id="group-offloading">
+<hfoption id="pipeline">
+
+Call [`~DiffusionPipeline.enable_group_offload`] on a pipeline.
+
+```py
+import torch
+from diffusers import CogVideoXPipeline
+from diffusers.hooks import apply_group_offloading
+from diffusers.utils import export_to_video
+
+onload_device = torch.device("cuda")
+offload_device = torch.device("cpu")
+
+pipeline = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16)
+pipeline.enable_group_offload(
+    onload_device=onload_device,
+    offload_device=offload_device,
+    offload_type="leaf_level",
+    use_stream=True
+)
+
+prompt = (
+    "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. "
+    "The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other "
+    "pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, "
+    "casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. "
+    "The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical "
+    "atmosphere of this unique musical performance."
+)
+video = pipeline(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
+print(f"Max memory reserved: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")
+export_to_video(video, "output.mp4", fps=8)
+```
+
+</hfoption>
+<hfoption id="model">
+
+Call [`~ModelMixin.enable_group_offload`] on standard Diffusers model components that inherit from [`ModelMixin`]. For other model components that don't inherit from [`ModelMixin`], such as a generic [torch.nn.Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html), use [`~hooks.apply_group_offloading`] instead.
+
 ```py
 import torch
 from diffusers import CogVideoXPipeline
@@ -328,6 +368,9 @@ print(f"Max memory reserved: {torch.cuda.max_memory_allocated() / 1024**3:.2f} G
 export_to_video(video, "output.mp4", fps=8)
 ```

+</hfoption>
+</hfoptions>
+
 #### CUDA stream

 The `use_stream` parameter can be activated for CUDA devices that support asynchronous data transfer streams to reduce overall execution time compared to [CPU offloading](#cpu-offloading). It overlaps data transfer and computation by using layer prefetching. The next layer to be executed is loaded onto the GPU while the current layer is still being executed. It can increase CPU memory significantly so ensure you have 2x the amount of memory as the model size.
@@ -10,7 +10,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
 specific language governing permissions and limitations under the License.
 -->

-# Compile and offloading quantized models
+# Compiling and offloading quantized models

 Optimizing models often involves trade-offs between [inference speed](./fp16) and [memory-usage](./memory). For instance, while [caching](./cache) can boost inference speed, it also increases memory consumption since it needs to store the outputs of intermediate attention layers. A more balanced optimization strategy combines quantizing a model, [torch.compile](./fp16#torchcompile) and various [offloading methods](./memory#offloading).

@@ -28,7 +28,8 @@ The table below provides a comparison of optimization strategy combinations and
 | quantization  | 32.602 | 14.9453 |
 | quantization, torch.compile  | 25.847 | 14.9448 |
 | quantization, torch.compile, model CPU offloading | 32.312 | 12.2369 |
-<small>These results are benchmarked on Flux with a RTX 4090. The transformer and text_encoder components are quantized. Refer to the [benchmarking script](https://gist.github.com/sayakpaul/0db9d8eeeb3d2a0e5ed7cf0d9ca19b7d) if you're interested in evaluating your own model.</small>
+
+<small>These results are benchmarked on Flux with a RTX 4090. The transformer and text_encoder components are quantized. Refer to the <a href="https://gist.github.com/sayakpaul/0db9d8eeeb3d2a0e5ed7cf0d9ca19b7d">benchmarking script</a> if you're interested in evaluating your own model.</small>

 This guide will show you how to compile and offload a quantized model with [bitsandbytes](../quantization/bitsandbytes#torchcompile). Make sure you are using [PyTorch nightly](https://pytorch.org/get-started/locally/) and the latest version of bitsandbytes.

@@ -0,0 +1,141 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# NVIDIA ModelOpt
+
+[NVIDIA-ModelOpt](https://github.com/NVIDIA/TensorRT-Model-Optimizer) is a unified library of state-of-the-art model optimization techniques like quantization, pruning, distillation, speculative decoding, etc. It compresses deep learning models for downstream deployment frameworks like TensorRT-LLM or TensorRT to optimize inference speed.
+
+Before you begin, make sure you have nvidia_modelopt installed.
+
+```bash
+pip install -U "nvidia_modelopt[hf]"
+```
+
+Quantize a model by passing [`NVIDIAModelOptConfig`] to [`~ModelMixin.from_pretrained`] (you can also load pre-quantized models). This works for any model in any modality, as long as it supports loading with [Accelerate](https://hf.co/docs/accelerate/index) and contains `torch.nn.Linear` layers.
+
+The example below only quantizes the weights to FP8.
+
+```python
+import torch
+from diffusers import AutoModel, SanaPipeline, NVIDIAModelOptConfig
+
+model_id = "Efficient-Large-Model/Sana_600M_1024px_diffusers"
+dtype = torch.bfloat16
+
+quantization_config = NVIDIAModelOptConfig(quant_type="FP8", quant_method="modelopt")
+transformer = AutoModel.from_pretrained(
+    model_id,
+    subfolder="transformer",
+    quantization_config=quantization_config,
+    torch_dtype=dtype,
+)
+pipe = SanaPipeline.from_pretrained(
+    model_id,
+    transformer=transformer,
+    torch_dtype=dtype,
+)
+pipe.to("cuda")
+
+print(f"Pipeline memory usage: {torch.cuda.max_memory_reserved() / 1024**3:.3f} GB")
+
+prompt = "A cat holding a sign that says hello world"
+image = pipe(
+    prompt, num_inference_steps=50, guidance_scale=4.5, max_sequence_length=512
+).images[0]
+image.save("output.png")
+```
+
+> **Note:**
+>
+> The quantization methods in NVIDIA-ModelOpt are designed to reduce the memory footprint of model weights using various QAT (Quantization-Aware Training) and PTQ (Post-Training Quantization) techniques while maintaining model performance. However, the actual performance gain during inference depends on the deployment framework (e.g., TRT-LLM, TensorRT) and the specific hardware configuration.  
+> 
+> More details can be found [here](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples).
+
+## NVIDIAModelOptConfig
+
+The `NVIDIAModelOptConfig` class accepts three parameters:
+- `quant_type`: A string value mentioning one of the quantization types below.
+- `modules_to_not_convert`: A list of module full/partial module names for which quantization should not be performed. For example, to not perform any quantization of the [`SD3Transformer2DModel`]'s pos_embed projection blocks, one would specify: `modules_to_not_convert=["pos_embed.proj.weight"]`.
+- `disable_conv_quantization`: A boolean value which when set to `True` disables quantization for all convolutional layers in the model. This is useful as channel and block quantization generally don't work well with convolutional layers (used with INT4, NF4, NVFP4). If you want to disable quantization for specific convolutional layers, use `modules_to_not_convert` instead.
+- `algorithm`: The algorithm to use for determining scale, defaults to `"max"`. You can check modelopt documentation for more algorithms and details.
+- `forward_loop`: The forward loop function to use for calibrating activation during quantization. If not provided, it relies on static scale values computed using the weights only.
+- `kwargs`: A dict of keyword arguments to pass to the underlying quantization method which will be invoked based on `quant_type`.
+
+## Supported quantization types
+
+ModelOpt supports weight-only, channel and block quantization int8, fp8, int4, nf4, and nvfp4. The quantization methods are designed to reduce the memory footprint of the model weights while maintaining the performance of the model during inference.
+
+Weight-only quantization stores the model weights in a specific low-bit data type but performs computation with a higher-precision data type, like `bfloat16`. This lowers the memory requirements from model weights but retains the memory peaks for activation computation.
+
+The quantization methods supported are as follows:
+
+| **Quantization Type** | **Supported Schemes** | **Required Kwargs** | **Additional Notes** |
+|-----------------------|-----------------------|---------------------|----------------------|
+| **INT8** | `int8 weight only`, `int8 channel quantization`, `int8 block quantization` | `quant_type`, `quant_type + channel_quantize`, `quant_type + channel_quantize + block_quantize` |
+| **FP8** | `fp8 weight only`, `fp8 channel quantization`, `fp8 block quantization` | `quant_type`, `quant_type + channel_quantize`, `quant_type + channel_quantize + block_quantize` |
+| **INT4** | `int4 weight only`, `int4 block quantization` | `quant_type`, `quant_type + channel_quantize + block_quantize` | `channel_quantize = -1 is only supported for now`|
+| **NF4** | `nf4 weight only`, `nf4 double block quantization` | `quant_type`, `quant_type + channel_quantize + block_quantize + scale_channel_quantize` + `scale_block_quantize` | `channel_quantize = -1 and scale_channel_quantize = -1 are only supported for now` |
+| **NVFP4** | `nvfp4 weight only`, `nvfp4 block quantization` | `quant_type`, `quant_type + channel_quantize + block_quantize` | `channel_quantize = -1 is only supported for now`|
+
+
+Refer to the [official modelopt documentation](https://nvidia.github.io/TensorRT-Model-Optimizer/) for a better understanding of the available quantization methods and the exhaustive list of configuration options available.
+
+## Serializing and Deserializing quantized models
+
+To serialize a quantized model in a given dtype, first load the model with the desired quantization dtype and then save it using the [`~ModelMixin.save_pretrained`] method.
+
+```python
+import torch
+from diffusers import AutoModel, NVIDIAModelOptConfig
+from modelopt.torch.opt import enable_huggingface_checkpointing
+
+enable_huggingface_checkpointing()
+
+model_id = "Efficient-Large-Model/Sana_600M_1024px_diffusers"
+quant_config_fp8 = {"quant_type": "FP8", "quant_method": "modelopt"}
+quant_config_fp8 = NVIDIAModelOptConfig(**quant_config_fp8)
+model = AutoModel.from_pretrained(
+    model_id,
+    subfolder="transformer",
+    quantization_config=quant_config_fp8,
+    torch_dtype=torch.bfloat16,
+)
+model.save_pretrained('path/to/sana_fp8', safe_serialization=False)
+```
+
+To load a serialized quantized model, use the [`~ModelMixin.from_pretrained`] method.
+
+```python
+import torch
+from diffusers import AutoModel, NVIDIAModelOptConfig, SanaPipeline
+from modelopt.torch.opt import enable_huggingface_checkpointing
+
+enable_huggingface_checkpointing()
+
+quantization_config = NVIDIAModelOptConfig(quant_type="FP8", quant_method="modelopt")
+transformer = AutoModel.from_pretrained(
+    "path/to/sana_fp8",
+    subfolder="transformer",
+    quantization_config=quantization_config,
+    torch_dtype=torch.bfloat16,
+)
+pipe = SanaPipeline.from_pretrained(
+    "Efficient-Large-Model/Sana_600M_1024px_diffusers",
+    transformer=transformer,
+    torch_dtype=torch.bfloat16,
+)
+pipe.to("cuda")
+prompt = "A cat holding a sign that says hello world"
+image = pipe(
+    prompt, num_inference_steps=50, guidance_scale=4.5, max_sequence_length=512
+).images[0]
+image.save("output.png")
+```
@@ -34,7 +34,9 @@ Initialize [`~quantizers.PipelineQuantizationConfig`] with the following paramet
 > [!TIP]
 > These `quant_kwargs` arguments are different for each backend. Refer to the [Quantization API](../api/quantization) docs to view the arguments for each backend.

- `components_to_quantize` specifies which components of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact.
+- `components_to_quantize` specifies which component(s) of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact.
+
+   `components_to_quantize` accepts either a list for multiple models or a string for a single model.

 The example below loads the bitsandbytes backend with the following arguments from [`~quantizers.quantization_config.BitsAndBytesConfig`], `load_in_4bit`, `bnb_4bit_quant_type`, and `bnb_4bit_compute_dtype`.

@@ -62,6 +64,7 @@ pipe = DiffusionPipeline.from_pretrained(
 image = pipe("photo of a cute dog").images[0]
 ```

+
 ### Advanced quantization

 The `quant_mapping` argument provides more options for how to quantize each individual component in a pipeline, like combining different quantization backends.
@@ -14,7 +14,7 @@ specific language governing permissions and limitations under the License.

 [ControlNet](https://hf.co/papers/2302.05543) models are adapters trained on top of another pretrained model. It allows for a greater degree of control over image generation by conditioning the model with an additional input image. The input image can be a canny edge, depth map, human pose, and many more.

-If you're training on a GPU with limited vRAM, you should try enabling the `gradient_checkpointing`, `gradient_accumulation_steps`, and `mixed_precision` parameters in the training command. You can also reduce your memory footprint by using memory-efficient attention with [xFormers](../optimization/xformers). JAX/Flax training is also supported for efficient training on TPUs and GPUs, but it doesn't support gradient checkpointing or xFormers. You should have a GPU with >30GB of memory if you want to train faster with Flax.
+If you're training on a GPU with limited vRAM, you should try enabling the `gradient_checkpointing`, `gradient_accumulation_steps`, and `mixed_precision` parameters in the training command. You can also reduce your memory footprint by using memory-efficient attention with [xFormers](../optimization/xformers).

 This guide will explore the [train_controlnet.py](https://github.com/huggingface/diffusers/blob/main/examples/controlnet/train_controlnet.py) training script to help you become familiar with it, and how you can adapt it for your own use-case.

@@ -28,45 +28,10 @@ pip install .

 Then navigate to the example folder containing the training script and install the required dependencies for the script you're using:

-<hfoptions id="installation">
-<hfoption id="PyTorch">
 ```bash
 cd examples/controlnet
 pip install -r requirements.txt
 ```
-</hfoption>
-<hfoption id="Flax">
-
-If you have access to a TPU, the Flax training script runs even faster! Let's run the training script on the [Google Cloud TPU VM](https://cloud.google.com/tpu/docs/run-calculation-jax). Create a single TPU v4-8 VM and connect to it:
-
-```bash
-ZONE=us-central2-b
-TPU_TYPE=v4-8
-VM_NAME=hg_flax
-
-gcloud alpha compute tpus tpu-vm create $VM_NAME \
- --zone $ZONE \
- --accelerator-type $TPU_TYPE \
- --version  tpu-vm-v4-base
-
-gcloud alpha compute tpus tpu-vm ssh $VM_NAME --zone $ZONE -- \
-```
-
-Install JAX 0.4.5:
-
-```bash
-pip install "jax[tpu]==0.4.5" -f https://storage.googleapis.com/jax-releases/libtpu_releases.html
-```
-
-Then install the required dependencies for the Flax script:
-
-```bash
-cd examples/controlnet
-pip install -r requirements_flax.txt
-```
-
-</hfoption>
-</hfoptions>

 <Tip>

@@ -120,7 +85,7 @@ Many of the basic and important parameters are described in the [Text-to-image](

 ### Min-SNR weighting

-The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch and is unavailable in the Flax training script.
+The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch.

 Add the `--snr_gamma` parameter and set it to the recommended value of 5.0:

@@ -272,9 +237,6 @@ That's it! You don't need to add any additional parameters to your training comm
 </hfoption>
 </hfoptions>

-<hfoptions id="training-inference">
-<hfoption id="PyTorch">
-
 ```bash
 export MODEL_DIR="stable-diffusion-v1-5/stable-diffusion-v1-5"
 export OUTPUT_DIR="path/to/save/model"
@@ -292,47 +254,6 @@ accelerate launch train_controlnet.py \
 --push_to_hub
 ```

-</hfoption>
-<hfoption id="Flax">
-
-With Flax, you can [profile your code](https://jax.readthedocs.io/en/latest/profiling.html) by adding the `--profile_steps==5` parameter to your training command. Install the Tensorboard profile plugin:
-
-```bash
-pip install tensorflow tensorboard-plugin-profile
-tensorboard --logdir runs/fill-circle-100steps-20230411_165612/
-```
-
-Then you can inspect the profile at [http://localhost:6006/#profile](http://localhost:6006/#profile).
-
-<Tip warning={true}>
-
-If you run into version conflicts with the plugin, try uninstalling and reinstalling all versions of TensorFlow and Tensorboard. The debugging functionality of the profile plugin is still experimental, and not all views are fully functional. The `trace_viewer` cuts off events after 1M, which can result in all your device traces getting lost if for example, you profile the compilation step by accident.
-
-</Tip>
-
-```bash
-python3 train_controlnet_flax.py \
- --pretrained_model_name_or_path=$MODEL_DIR \
- --output_dir=$OUTPUT_DIR \
- --dataset_name=fusing/fill50k \
- --resolution=512 \
- --learning_rate=1e-5 \
- --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
- --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
- --validation_steps=1000 \
- --train_batch_size=2 \
- --revision="non-ema" \
- --from_pt \
- --report_to="wandb" \
- --tracker_project_name=$HUB_MODEL_ID \
- --num_train_epochs=11 \
- --push_to_hub \
- --hub_model_id=$HUB_MODEL_ID
-```
-
-</hfoption>
-</hfoptions>
-
 Once training is complete, you can use your newly trained model for inference!

 ```py
@@ -223,7 +223,7 @@ from diffusers.image_processor import VaeImageProcessor
 import torch 

 vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=torch.bfloat16).to("cuda")
-vae_scale_factor = 2 ** (len(vae.config.block_out_channels))
+vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
 image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)

 with torch.no_grad():
@@ -14,7 +14,7 @@ specific language governing permissions and limitations under the License.

 [DreamBooth](https://huggingface.co/papers/2208.12242) is a training technique that updates the entire diffusion model by training on just a few images of a subject or style. It works by associating a special word in the prompt with the example images.

-If you're training on a GPU with limited vRAM, you should try enabling the `gradient_checkpointing` and `mixed_precision` parameters in the training command. You can also reduce your memory footprint by using memory-efficient attention with [xFormers](../optimization/xformers). JAX/Flax training is also supported for efficient training on TPUs and GPUs, but it doesn't support gradient checkpointing or xFormers. You should have a GPU with >30GB of memory if you want to train faster with Flax.
+If you're training on a GPU with limited vRAM, you should try enabling the `gradient_checkpointing` and `mixed_precision` parameters in the training command. You can also reduce your memory footprint by using memory-efficient attention with [xFormers](../optimization/xformers).

 This guide will explore the [train_dreambooth.py](https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py) script to help you become more familiar with it, and how you can adapt it for your own use-case.

@@ -28,25 +28,11 @@ pip install .

 Navigate to the example folder with the training script and install the required dependencies for the script you're using:

-<hfoptions id="installation">
-<hfoption id="PyTorch">
-
 ```bash
 cd examples/dreambooth
 pip install -r requirements.txt
 ```

-</hfoption>
-<hfoption id="Flax">
-
-```bash
-cd examples/dreambooth
-pip install -r requirements_flax.txt
-```
-
-</hfoption>
-</hfoptions>
-
 <Tip>

 🤗 Accelerate is a library for helping you train on multiple GPUs/TPUs or with mixed-precision. It'll automatically configure your training setup based on your hardware and environment. Take a look at the 🤗 Accelerate [Quick tour](https://huggingface.co/docs/accelerate/quicktour) to learn more.
@@ -110,7 +96,7 @@ Some basic and important parameters to know and specify are:

 ### Min-SNR weighting

-The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch and is unavailable in the Flax training script.
+The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch.

 Add the `--snr_gamma` parameter and set it to the recommended value of 5.0:

@@ -311,9 +297,6 @@ That's it! You don't need to add any additional parameters to your training comm
 </hfoption>
 </hfoptions>

-<hfoptions id="training-inference">
-<hfoption id="PyTorch">
-
 ```bash
 export MODEL_NAME="stable-diffusion-v1-5/stable-diffusion-v1-5"
 export INSTANCE_DIR="./dog"
@@ -334,29 +317,6 @@ accelerate launch train_dreambooth.py \
  --push_to_hub
 ```

-</hfoption>
-<hfoption id="Flax">
-
-```bash
-export MODEL_NAME="duongna/stable-diffusion-v1-4-flax"
-export INSTANCE_DIR="./dog"
-export OUTPUT_DIR="path-to-save-model"
-
-python train_dreambooth_flax.py \
-  --pretrained_model_name_or_path=$MODEL_NAME  \
-  --instance_data_dir=$INSTANCE_DIR \
-  --output_dir=$OUTPUT_DIR \
-  --instance_prompt="a photo of sks dog" \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --learning_rate=5e-6 \
-  --max_train_steps=400 \
-  --push_to_hub
-```
-
-</hfoption>
-</hfoptions>
-
 Once training is complete, you can use your newly trained model for inference!

 <Tip>
@@ -383,9 +343,6 @@ image.save("dog-bucket.png")

 </Tip>

-<hfoptions id="training-inference">
-<hfoption id="PyTorch">
-
 ```py
 from diffusers import DiffusionPipeline
 import torch
@@ -395,39 +352,6 @@ image = pipeline("A photo of sks dog in a bucket", num_inference_steps=50, guida
 image.save("dog-bucket.png")
 ```

-</hfoption>
-<hfoption id="Flax">
-
-```py
-import jax
-import numpy as np
-from flax.jax_utils import replicate
-from flax.training.common_utils import shard
-from diffusers import FlaxStableDiffusionPipeline
-
-pipeline, params = FlaxStableDiffusionPipeline.from_pretrained("path-to-your-trained-model", dtype=jax.numpy.bfloat16)
-
-prompt = "A photo of sks dog in a bucket"
-prng_seed = jax.random.PRNGKey(0)
-num_inference_steps = 50
-
-num_samples = jax.device_count()
-prompt = num_samples * [prompt]
-prompt_ids = pipeline.prepare_inputs(prompt)
-
-# shard inputs and rng
-params = replicate(params)
-prng_seed = jax.random.split(prng_seed, jax.device_count())
-prompt_ids = shard(prompt_ids)
-
-images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images
-images = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:])))
-image.save("dog-bucket.png")
-```
-
-</hfoption>
-</hfoptions>
-
 ## LoRA

 LoRA is a training technique for significantly reducing the number of trainable parameters. As a result, training is faster and it is easier to store the resulting weights because they are a lot smaller (~100MBs). Use the [train_dreambooth_lora.py](https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_lora.py) script to train with LoRA.
@@ -88,7 +88,7 @@ Most of the parameters are identical to the parameters in the [Text-to-image](te

 ### Min-SNR weighting

-The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch and is unavailable in the Flax training script.
+The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch.

 Add the `--snr_gamma` parameter and set it to the recommended value of 5.0:

@@ -38,25 +38,11 @@ pip install .

 Navigate to the example folder with the training script and install the required dependencies for the script you're using:

-<hfoptions id="installation">
-<hfoption id="PyTorch">
-
 ```bash
 cd examples/text_to_image
 pip install -r requirements.txt
 ```

-</hfoption>
-<hfoption id="Flax">
-
-```bash
-cd examples/text_to_image
-pip install -r requirements_flax.txt
-```
-
-</hfoption>
-</hfoptions>
-
 <Tip>

 🤗 Accelerate is a library for helping you train on multiple GPUs/TPUs or with mixed-precision. It'll automatically configure your training setup based on your hardware and environment. Take a look at the 🤗 Accelerate [Quick tour](https://huggingface.co/docs/accelerate/quicktour) to learn more.
@@ -23,18 +23,18 @@ Each training script is:

 Our current collection of training scripts include:

-| Training | SDXL-support | LoRA-support | Flax-support |
-|---|---|---|---|
-| [unconditional image generation](https://github.com/huggingface/diffusers/tree/main/examples/unconditional_image_generation) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) |  |  |  |
-| [text-to-image](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image) | 👍 | 👍 | 👍 |
-| [textual inversion](https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb) |  |  | 👍 |
-| [DreamBooth](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_training.ipynb) | 👍 | 👍 | 👍 |
-| [ControlNet](https://github.com/huggingface/diffusers/tree/main/examples/controlnet) | 👍 |  | 👍 |
-| [InstructPix2Pix](https://github.com/huggingface/diffusers/tree/main/examples/instruct_pix2pix) | 👍 |  |  |
-| [Custom Diffusion](https://github.com/huggingface/diffusers/tree/main/examples/custom_diffusion) |  |  |  |
-| [T2I-Adapters](https://github.com/huggingface/diffusers/tree/main/examples/t2i_adapter) | 👍 |  |  |
-| [Kandinsky 2.2](https://github.com/huggingface/diffusers/tree/main/examples/kandinsky2_2/text_to_image) |  | 👍 |  |
-| [Wuerstchen](https://github.com/huggingface/diffusers/tree/main/examples/wuerstchen/text_to_image) |  | 👍 |  |
+| Training | SDXL-support | LoRA-support |
+|---|---|---|
+| [unconditional image generation](https://github.com/huggingface/diffusers/tree/main/examples/unconditional_image_generation) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) |  |  |
+| [text-to-image](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image) | 👍 | 👍 |
+| [textual inversion](https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb) |  |  |
+| [DreamBooth](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_training.ipynb) | 👍 | 👍 |
+| [ControlNet](https://github.com/huggingface/diffusers/tree/main/examples/controlnet) | 👍 |  |
+| [InstructPix2Pix](https://github.com/huggingface/diffusers/tree/main/examples/instruct_pix2pix) | 👍 |  |
+| [Custom Diffusion](https://github.com/huggingface/diffusers/tree/main/examples/custom_diffusion) |  |  |
+| [T2I-Adapters](https://github.com/huggingface/diffusers/tree/main/examples/t2i_adapter) | 👍 |  |
+| [Kandinsky 2.2](https://github.com/huggingface/diffusers/tree/main/examples/kandinsky2_2/text_to_image) |  | 👍 |
+| [Wuerstchen](https://github.com/huggingface/diffusers/tree/main/examples/wuerstchen/text_to_image) |  | 👍 |

 These examples are **actively** maintained, so please feel free to open an issue if they aren't working as expected. If you feel like another training example should be included, you're more than welcome to start a [Feature Request](https://github.com/huggingface/diffusers/issues/new?assignees=&labels=&template=feature_request.md&title=) to discuss your feature idea with us and whether it meets our criteria of being self-contained, easy-to-tweak, beginner-friendly, and single-purpose.

@@ -48,7 +48,7 @@ cd diffusers
 pip install .
 ```

-Then navigate to the folder of the training script (for example, [DreamBooth](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth)) and install the `requirements.txt` file. Some training scripts have a specific requirement file for SDXL, LoRA or Flax. If you're using one of these scripts, make sure you install its corresponding requirements file.
+Then navigate to the folder of the training script (for example, [DreamBooth](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth)) and install the `requirements.txt` file. Some training scripts have a specific requirement file for SDXL or LoRA. If you're using one of these scripts, make sure you install its corresponding requirements file.

 ```bash
 cd examples/dreambooth
@@ -96,7 +96,7 @@ Most of the parameters are identical to the parameters in the [Text-to-image](te

 ### Min-SNR weighting

-The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting either `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch and is unavailable in the Flax training script.
+The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting either `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch.

 Add the `--snr_gamma` parameter and set it to the recommended value of 5.0:

@@ -20,7 +20,7 @@ The text-to-image script is experimental, and it's easy to overfit and run into

 Text-to-image models like Stable Diffusion are conditioned to generate images given a text prompt.

-Training a model can be taxing on your hardware, but if you enable `gradient_checkpointing` and `mixed_precision`, it is possible to train a model on a single 24GB GPU. If you're training with larger batch sizes or want to train faster, it's better to use GPUs with more than 30GB of memory. You can reduce your memory footprint by enabling memory-efficient attention with [xFormers](../optimization/xformers). JAX/Flax training is also supported for efficient training on TPUs and GPUs, but it doesn't support gradient checkpointing, gradient accumulation or xFormers. A GPU with at least 30GB of memory or a TPU v3 is recommended for training with Flax.
+Training a model can be taxing on your hardware, but if you enable `gradient_checkpointing` and `mixed_precision`, it is possible to train a model on a single 24GB GPU. If you're training with larger batch sizes or want to train faster, it's better to use GPUs with more than 30GB of memory. You can reduce your memory footprint by enabling memory-efficient attention with [xFormers](../optimization/xformers).

 This guide will explore the [train_text_to_image.py](https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image.py) training script to help you become familiar with it, and how you can adapt it for your own use-case.

@@ -34,20 +34,10 @@ pip install .

 Then navigate to the example folder containing the training script and install the required dependencies for the script you're using:

-<hfoptions id="installation">
-<hfoption id="PyTorch">
 ```bash
 cd examples/text_to_image
 pip install -r requirements.txt
 ```
-</hfoption>
-<hfoption id="Flax">
-```bash
-cd examples/text_to_image
-pip install -r requirements_flax.txt
-```
-</hfoption>
-</hfoptions>

 <Tip>

@@ -106,7 +96,7 @@ Some basic and important parameters include:

 ### Min-SNR weighting

-The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch and is unavailable in the Flax training script.
+The [Min-SNR](https://huggingface.co/papers/2303.09556) weighting strategy can help with training by rebalancing the loss to achieve faster convergence. The training script supports predicting `epsilon` (noise) or `v_prediction`, but Min-SNR is compatible with both prediction types. This weighting strategy is only supported by PyTorch.

 Add the `--snr_gamma` parameter and set it to the recommended value of 5.0:

@@ -155,9 +145,6 @@ Lastly, the [training loop](https://github.com/huggingface/diffusers/blob/8959c5

 Once you've made all your changes or you're okay with the default configuration, you're ready to launch the training script! 🚀

-<hfoptions id="training-inference">
-<hfoption id="PyTorch">
-
 Let's train on the [Naruto BLIP captions](https://huggingface.co/datasets/lambdalabs/naruto-blip-captions) dataset to generate your own Naruto characters. Set the environment variables `MODEL_NAME` and `dataset_name` to the model and the dataset (either from the Hub or a local path). If you're training on more than one GPU, add the `--multi_gpu` parameter to the `accelerate launch` command.

 <Tip>
@@ -187,43 +174,8 @@ accelerate launch --mixed_precision="fp16"  train_text_to_image.py \
  --push_to_hub
 ```

-</hfoption>
-<hfoption id="Flax">
-
-Training with Flax can be faster on TPUs and GPUs thanks to [@duongna211](https://github.com/duongna21). Flax is more efficient on a TPU, but GPU performance is also great.
-
-Set the environment variables `MODEL_NAME` and `dataset_name` to the model and the dataset (either from the Hub or a local path).
-
-<Tip>
-
-To train on a local dataset, set the `TRAIN_DIR` and `OUTPUT_DIR` environment variables to the path of the dataset and where to save the model to.
-
-</Tip>
-
-```bash
-export MODEL_NAME="stable-diffusion-v1-5/stable-diffusion-v1-5"
-export dataset_name="lambdalabs/naruto-blip-captions"
-
-python train_text_to_image_flax.py \
-  --pretrained_model_name_or_path=$MODEL_NAME \
-  --dataset_name=$dataset_name \
-  --resolution=512 --center_crop --random_flip \
-  --train_batch_size=1 \
-  --max_train_steps=15000 \
-  --learning_rate=1e-05 \
-  --max_grad_norm=1 \
-  --output_dir="sd-naruto-model" \
-  --push_to_hub
-```
-
-</hfoption>
-</hfoptions>
-
 Once training is complete, you can use your newly trained model for inference:

-<hfoptions id="training-inference">
-<hfoption id="PyTorch">
-
 ```py
 from diffusers import StableDiffusionPipeline
 import torch
@@ -234,39 +186,6 @@ image = pipeline(prompt="yoda").images[0]
 image.save("yoda-naruto.png")
 ```

-</hfoption>
-<hfoption id="Flax">
-
-```py
-import jax
-import numpy as np
-from flax.jax_utils import replicate
-from flax.training.common_utils import shard
-from diffusers import FlaxStableDiffusionPipeline
-
-pipeline, params = FlaxStableDiffusionPipeline.from_pretrained("path/to/saved_model", dtype=jax.numpy.bfloat16)
-
-prompt = "yoda naruto"
-prng_seed = jax.random.PRNGKey(0)
-num_inference_steps = 50
-
-num_samples = jax.device_count()
-prompt = num_samples * [prompt]
-prompt_ids = pipeline.prepare_inputs(prompt)
-
-# shard inputs and rng
-params = replicate(params)
-prng_seed = jax.random.split(prng_seed, jax.device_count())
-prompt_ids = shard(prompt_ids)
-
-images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images
-images = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:])))
-image.save("yoda-naruto.png")
-```
-
-</hfoption>
-</hfoptions>
-
 ## Next steps

 Congratulations on training your own text-to-image model! To learn more about how to use your new model, the following guides may be helpful:
@@ -14,7 +14,7 @@ specific language governing permissions and limitations under the License.

 [Textual Inversion](https://hf.co/papers/2208.01618) is a training technique for personalizing image generation models with just a few example images of what you want it to learn. This technique works by learning and updating the text embeddings (the new embeddings are tied to a special word you must use in the prompt) to match the example images you provide.

-If you're training on a GPU with limited vRAM, you should try enabling the `gradient_checkpointing` and `mixed_precision` parameters in the training command. You can also reduce your memory footprint by using memory-efficient attention with [xFormers](../optimization/xformers). JAX/Flax training is also supported for efficient training on TPUs and GPUs, but it doesn't support gradient checkpointing or xFormers. With the same configuration and setup as PyTorch, the Flax training script should be at least ~70% faster!
+If you're training on a GPU with limited vRAM, you should try enabling the `gradient_checkpointing` and `mixed_precision` parameters in the training command. You can also reduce your memory footprint by using memory-efficient attention with [xFormers](../optimization/xformers).

 This guide will explore the [textual_inversion.py](https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py) script to help you become more familiar with it, and how you can adapt it for your own use-case.

@@ -28,25 +28,10 @@ pip install .

 Navigate to the example folder with the training script and install the required dependencies for the script you're using:

-<hfoptions id="installation">
-<hfoption id="PyTorch">
-
 ```bash
 cd examples/textual_inversion
 pip install -r requirements.txt
 ```
-
-</hfoption>
-<hfoption id="Flax">
-
-```bash
-cd examples/textual_inversion
-pip install -r requirements_flax.txt
-```
-
-</hfoption>
-</hfoptions>
-
 <Tip>

 🤗 Accelerate is a library for helping you train on multiple GPUs/TPUs or with mixed-precision. It'll automatically configure your training setup based on your hardware and environment. Take a look at the 🤗 Accelerate [Quick tour](https://huggingface.co/docs/accelerate/quicktour) to learn more.
@@ -189,9 +174,6 @@ One more thing before you launch the script. If you're interested in following a
 --validation_steps=100
 ```

-<hfoptions id="training-inference">
-<hfoption id="PyTorch">
-
 ```bash
 export MODEL_NAME="stable-diffusion-v1-5/stable-diffusion-v1-5"
 export DATA_DIR="./cat"
@@ -214,36 +196,8 @@ accelerate launch textual_inversion.py \
  --push_to_hub
 ```

-</hfoption>
-<hfoption id="Flax">
-
-```bash
-export MODEL_NAME="duongna/stable-diffusion-v1-4-flax"
-export DATA_DIR="./cat"
-
-python textual_inversion_flax.py \
-  --pretrained_model_name_or_path=$MODEL_NAME \
-  --train_data_dir=$DATA_DIR \
-  --learnable_property="object" \
-  --placeholder_token="<cat-toy>" \
-  --initializer_token="toy" \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --max_train_steps=3000 \
-  --learning_rate=5.0e-04 \
-  --scale_lr \
-  --output_dir="textual_inversion_cat" \
-  --push_to_hub
-```
-
-</hfoption>
-</hfoptions>
-
 After training is complete, you can use your newly trained model for inference like:

-<hfoptions id="training-inference">
-<hfoption id="PyTorch">
-
 ```py
 from diffusers import StableDiffusionPipeline
 import torch
@@ -254,42 +208,6 @@ image = pipeline("A <cat-toy> train", num_inference_steps=50).images[0]
 image.save("cat-train.png")
 ```

-</hfoption>
-<hfoption id="Flax">
-
-Flax doesn't support the [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] method, but the textual_inversion_flax.py script [saves](https://github.com/huggingface/diffusers/blob/c0f058265161178f2a88849e92b37ffdc81f1dcc/examples/textual_inversion/textual_inversion_flax.py#L636C2-L636C2) the learned embeddings as a part of the model after training. This means you can use the model for inference like any other Flax model:
-
-```py
-import jax
-import numpy as np
-from flax.jax_utils import replicate
-from flax.training.common_utils import shard
-from diffusers import FlaxStableDiffusionPipeline
-
-model_path = "path-to-your-trained-model"
-pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(model_path, dtype=jax.numpy.bfloat16)
-
-prompt = "A <cat-toy> train"
-prng_seed = jax.random.PRNGKey(0)
-num_inference_steps = 50
-
-num_samples = jax.device_count()
-prompt = num_samples * [prompt]
-prompt_ids = pipeline.prepare_inputs(prompt)
-
-# shard inputs and rng
-params = replicate(params)
-prng_seed = jax.random.split(prng_seed, jax.device_count())
-prompt_ids = shard(prompt_ids)
-
-images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images
-images = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:])))
-image.save("cat-train.png")
-```
-
-</hfoption>
-</hfoptions>
-
 ## Next steps

 Congratulations on training your own Textual Inversion model! 🎉 To learn more about how to use your new model, the following guides may be helpful:
@@ -12,112 +12,56 @@ specific language governing permissions and limitations under the License.

 # AutoPipeline

-Diffusers provides many pipelines for basic tasks like generating images, videos, audio, and inpainting. On top of these, there are specialized pipelines for adapters and features like upscaling, super-resolution, and more. Different pipeline classes can even use the same checkpoint because they share the same pretrained model! With so many different pipelines, it can be overwhelming to know which pipeline class to use.
+[AutoPipeline](../api/models/auto_model) is a *task-and-model* pipeline that automatically selects the correct pipeline subclass based on the task. It handles the complexity of loading different pipeline subclasses without needing to know the specific pipeline subclass name.

-The [AutoPipeline](../api/pipelines/auto_pipeline) class is designed to simplify the variety of pipelines in Diffusers. It is a generic *task-first* pipeline that lets you focus on a task ([`AutoPipelineForText2Image`], [`AutoPipelineForImage2Image`], and [`AutoPipelineForInpainting`]) without needing to know the specific pipeline class. The [AutoPipeline](../api/pipelines/auto_pipeline) automatically detects the correct pipeline class to use.
+This is unlike [`DiffusionPipeline`], a *model-only* pipeline that automatically selects the pipeline subclass based on the model.

-For example, let's use the [dreamlike-art/dreamlike-photoreal-2.0](https://hf.co/dreamlike-art/dreamlike-photoreal-2.0) checkpoint.
-
-Under the hood, [AutoPipeline](../api/pipelines/auto_pipeline):
-
-1. Detects a `"stable-diffusion"` class from the [model_index.json](https://hf.co/dreamlike-art/dreamlike-photoreal-2.0/blob/main/model_index.json) file.
-2. Depending on the task you're interested in, it loads the [`StableDiffusionPipeline`], [`StableDiffusionImg2ImgPipeline`], or [`StableDiffusionInpaintPipeline`]. Any parameter (`strength`, `num_inference_steps`, etc.) you would pass to these specific pipelines can also be passed to the [AutoPipeline](../api/pipelines/auto_pipeline).
-
-<hfoptions id="autopipeline">
-<hfoption id="text-to-image">
+[`AutoPipelineForImage2Image`] returns a specific pipeline subclass, (for example, [`StableDiffusionXLImg2ImgPipeline`]), which can only be used for image-to-image tasks.

 ```py
-from diffusers import AutoPipelineForText2Image
 import torch
-
-pipe_txt2img = AutoPipelineForText2Image.from_pretrained(
-    "dreamlike-art/dreamlike-photoreal-2.0", torch_dtype=torch.float16, use_safetensors=True
-).to("cuda")
-
-prompt = "cinematic photo of Godzilla eating sushi with a cat in a izakaya, 35mm photograph, film, professional, 4k, highly detailed"
-generator = torch.Generator(device="cpu").manual_seed(37)
-image = pipe_txt2img(prompt, generator=generator).images[0]
-image
-```
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-text2img.png"/>
-</div>
-
-</hfoption>
-<hfoption id="image-to-image">
-
-```py
 from diffusers import AutoPipelineForImage2Image
-from diffusers.utils import load_image
-import torch
-
-pipe_img2img = AutoPipelineForImage2Image.from_pretrained(
-    "dreamlike-art/dreamlike-photoreal-2.0", torch_dtype=torch.float16, use_safetensors=True
-).to("cuda")
-
-init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-text2img.png")
-
-prompt = "cinematic photo of Godzilla eating burgers with a cat in a fast food restaurant, 35mm photograph, film, professional, 4k, highly detailed"
-generator = torch.Generator(device="cpu").manual_seed(53)
-image = pipe_img2img(prompt, image=init_image, generator=generator).images[0]
-image
-```
-
-Notice how the [dreamlike-art/dreamlike-photoreal-2.0](https://hf.co/dreamlike-art/dreamlike-photoreal-2.0) checkpoint is used for both text-to-image and image-to-image tasks? To save memory and avoid loading the checkpoint twice, use the [`~DiffusionPipeline.from_pipe`] method.
-
-```py
-pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img).to("cuda")
-image = pipeline(prompt, image=init_image, generator=generator).images[0]
-image
-```
-
-You can learn more about the [`~DiffusionPipeline.from_pipe`] method in the [Reuse a pipeline](../using-diffusers/loading#reuse-a-pipeline) guide.
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-img2img.png"/>
-</div>
-
-</hfoption>
-<hfoption id="inpainting">
-
-```py
-from diffusers import AutoPipelineForInpainting
-from diffusers.utils import load_image
-import torch
-
-pipeline = AutoPipelineForInpainting.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True
-).to("cuda")
-
-init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-img2img.png")
-mask_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-mask.png")
-
-prompt = "cinematic photo of a owl, 35mm photograph, film, professional, 4k, highly detailed"
-generator = torch.Generator(device="cpu").manual_seed(38)
-image = pipeline(prompt, image=init_image, mask_image=mask_image, generator=generator, strength=0.4).images[0]
-image
-```
-
-<div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-inpaint.png"/>
-</div>
-
-</hfoption>
-</hfoptions>
-
-## Unsupported checkpoints
-
-The [AutoPipeline](../api/pipelines/auto_pipeline) supports [Stable Diffusion](../api/pipelines/stable_diffusion/overview), [Stable Diffusion XL](../api/pipelines/stable_diffusion/stable_diffusion_xl), [ControlNet](../api/pipelines/controlnet), [Kandinsky 2.1](../api/pipelines/kandinsky.md), [Kandinsky 2.2](../api/pipelines/kandinsky_v22), and [DeepFloyd IF](../api/pipelines/deepfloyd_if) checkpoints.
-
-If you try to load an unsupported checkpoint, you'll get an error.
-
-```py
-from diffusers import AutoPipelineForImage2Image
-import torch

 pipeline = AutoPipelineForImage2Image.from_pretrained(
-    "openai/shap-e-img2img", torch_dtype=torch.float16, use_safetensors=True
+  "RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.bfloat16, device_map="cuda",
+)
+print(pipeline)
+"StableDiffusionXLImg2ImgPipeline {
+  "_class_name": "StableDiffusionXLImg2ImgPipeline",
+  ...
+"
+```
+
+Loading the same model with [`DiffusionPipeline`] returns the [`StableDiffusionXLPipeline`] subclass. It can be used for text-to-image, image-to-image, or inpainting tasks depending on the inputs.
+
+```py
+import torch
+from diffusers import DiffusionPipeline
+
+pipeline = DiffusionPipeline.from_pretrained(
+  "RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.bfloat16, device_map="cuda",
+)
+print(pipeline)
+"StableDiffusionXLPipeline {
+  "_class_name": "StableDiffusionXLPipeline",
+  ...
+"
+```
+
+Check the [mappings](https://github.com/huggingface/diffusers/blob/130fd8df54f24ffb006d84787b598d8adc899f23/src/diffusers/pipelines/auto_pipeline.py#L114) to see whether a model is supported or not.
+
+Trying to load an unsupported model returns an error.
+
+```py
+import torch
+from diffusers import AutoPipelineForImage2Image
+
+pipeline = AutoPipelineForImage2Image.from_pretrained(
+    "openai/shap-e-img2img", torch_dtype=torch.float16,
 )
 "ValueError: AutoPipeline can't find a pipeline linked to ShapEImg2ImgPipeline for None"
 ```
+
+There are three types of [AutoPipeline](../api/models/auto_model) classes, [`AutoPipelineForText2Image`], [`AutoPipelineForImage2Image`] and [`AutoPipelineForInpainting`]. Each of these classes have a predefined mapping, linking a pipeline to their task-specific subclass.
+
+When [`~AutoPipelineForText2Image.from_pretrained`] is called, it extracts the class name from the `model_index.json` file and selects the appropriate pipeline subclass for the task based on the mapping.
@@ -94,7 +94,7 @@ pipeline = AutoPipelineForText2Image.from_pretrained(
 pipeline.unet.load_lora_adapter(
    "jbilcke-hf/sdxl-cinematic-1",
    weight_name="pytorch_lora_weights.safetensors",
-    adapter_name="cinematic"
+    adapter_name="cinematic",
    prefix="unet"
 )
 # use cnmt in the prompt to trigger the LoRA
@@ -688,4 +688,4 @@ Browse the [LoRA Studio](https://lorastudio.co/models) for different LoRAs to us

 You can find additional LoRAs in the [FLUX LoRA the Explorer](https://huggingface.co/spaces/multimodalart/flux-lora-the-explorer) and [LoRA the Explorer](https://huggingface.co/spaces/multimodalart/LoraTheExplorer) Spaces.

-Check out the [Fast LoRA inference for Flux with Diffusers and PEFT](https://huggingface.co/blog/lora-fast) blog post to learn how to optimize LoRA inference with methods like FlashAttention-3 and fp8 quantization.
+Check out the [Fast LoRA inference for Flux with Diffusers and PEFT](https://huggingface.co/blog/lora-fast) blog post to learn how to optimize LoRA inference with methods like FlashAttention-3 and fp8 quantization.
@@ -12,52 +12,37 @@ specific language governing permissions and limitations under the License.

 # Pipeline callbacks

-The denoising loop of a pipeline can be modified with custom defined functions using the `callback_on_step_end` parameter. The callback function is executed at the end of each step, and modifies the pipeline attributes and variables for the next step. This is really useful for *dynamically* adjusting certain pipeline attributes or modifying tensor variables. This versatility allows for interesting use cases such as changing the prompt embeddings at each timestep, assigning different weights to the prompt embeddings, and editing the guidance scale. With callbacks, you can implement new features without modifying the underlying code!
+A callback is a function that modifies [`DiffusionPipeline`] behavior and it is executed at the end of a denoising step. The changes are propagated to subsequent steps in the denoising process. It is useful for adjusting pipeline attributes or tensor variables to support new features without rewriting the underlying pipeline code.

-> [!TIP]
-> 🤗 Diffusers currently only supports `callback_on_step_end`, but feel free to open a [feature request](https://github.com/huggingface/diffusers/issues/new/choose) if you have a cool use-case and require a callback function with a different execution point!
+Diffusers provides several callbacks in the pipeline [overview](../api/pipelines/overview#callbacks).

-This guide will demonstrate how callbacks work by a few features you can implement with them.
+To enable a callback, configure when the callback is executed after a certain number of denoising steps with one of the following arguments.

-## Official callbacks
+- `cutoff_step_ratio` specifies when a callback is activated as a percentage of the total denoising steps.
+- `cutoff_step_index` specifies the exact step number a callback is activated.

-We provide a list of callbacks you can plug into an existing pipeline and modify the denoising loop. This is the current list of official callbacks:
+The example below uses `cutoff_step_ratio=0.4`, which means the callback is activated once denoising reaches 40% of the total inference steps. [`~callbacks.SDXLCFGCutoffCallback`] disables classifier-free guidance (CFG) after a certain number of steps, which can help save compute without significantly affecting performance.

- `SDCFGCutoffCallback`: Disables the CFG after a certain number of steps for all SD 1.5 pipelines, including text-to-image, image-to-image, inpaint, and controlnet.
- `SDXLCFGCutoffCallback`: Disables the CFG after a certain number of steps for all SDXL pipelines, including text-to-image, image-to-image, inpaint, and controlnet.
- `IPAdapterScaleCutoffCallback`: Disables the IP Adapter after a certain number of steps for all pipelines supporting IP-Adapter.
+Define a callback with either of the `cutoff` arguments and pass it to the `callback_on_step_end` parameter in the pipeline.

-> [!TIP]
-> If you want to add a new official callback, feel free to open a [feature request](https://github.com/huggingface/diffusers/issues/new/choose) or [submit a PR](https://huggingface.co/docs/diffusers/main/en/conceptual/contribution#how-to-open-a-pr).
-
-To set up a callback, you need to specify the number of denoising steps after which the callback comes into effect. You can do so by using either one of these two arguments
-
- `cutoff_step_ratio`: Float number with the ratio of the steps.
- `cutoff_step_index`: Integer number with the exact number of the step.
-
-```python
+```py
 import torch
-
 from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline
 from diffusers.callbacks import SDXLCFGCutoffCallback

-
 callback = SDXLCFGCutoffCallback(cutoff_step_ratio=0.4)
-# can also be used with cutoff_step_index
+# if using cutoff_step_index
 # callback = SDXLCFGCutoffCallback(cutoff_step_ratio=None, cutoff_step_index=10)

 pipeline = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
-    variant="fp16",
-).to("cuda")
+    device_map="cuda"
+)
 pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, use_karras_sigmas=True)

 prompt = "a sports car at the road, best quality, high quality, high detail, 8k resolution"
-
-generator = torch.Generator(device="cpu").manual_seed(2628670641)
-
-out = pipeline(
+output = pipeline(
    prompt=prompt,
    negative_prompt="",
    guidance_scale=6.5,
@@ -65,83 +50,16 @@ out = pipeline(
    generator=generator,
    callback_on_step_end=callback,
 )
-
-out.images[0].save("official_callback.png")
 ```

-<div class="flex gap-4">
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/without_cfg_callback.png" alt="generated image of a sports car at the road" />
-    <figcaption class="mt-2 text-center text-sm text-gray-500">without SDXLCFGCutoffCallback</figcaption>
-  </div>
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/with_cfg_callback.png" alt="generated image of a sports car at the road with cfg callback" />
-    <figcaption class="mt-2 text-center text-sm text-gray-500">with SDXLCFGCutoffCallback</figcaption>
-  </div>
-</div>
+If you want to add a new official callback, feel free to open a [feature request](https://github.com/huggingface/diffusers/issues/new/choose) or [submit a PR](https://huggingface.co/docs/diffusers/main/en/conceptual/contribution#how-to-open-a-pr). Otherwise, you can also create your own callback as shown below.

-## Dynamic classifier-free guidance
+## Early stopping

-Dynamic classifier-free guidance (CFG) is a feature that allows you to disable CFG after a certain number of inference steps which can help you save compute with minimal cost to performance. The callback function for this should have the following arguments:
-
- `pipeline` (or the pipeline instance) provides access to important properties such as `num_timesteps` and `guidance_scale`. You can modify these properties by updating the underlying attributes. For this example, you'll disable CFG by setting `pipeline._guidance_scale=0.0`.
- `step_index` and `timestep` tell you where you are in the denoising loop. Use `step_index` to turn off CFG after reaching 40% of `num_timesteps`.
- `callback_kwargs` is a dict that contains tensor variables you can modify during the denoising loop. It only includes variables specified in the `callback_on_step_end_tensor_inputs` argument, which is passed to the pipeline's `__call__` method. Different pipelines may use different sets of variables, so please check a pipeline's `_callback_tensor_inputs` attribute for the list of variables you can modify. Some common variables include `latents` and `prompt_embeds`. For this function, change the batch size of `prompt_embeds` after setting `guidance_scale=0.0` in order for it to work properly.
-
-Your callback function should look something like this:
-
-```python
-def callback_dynamic_cfg(pipe, step_index, timestep, callback_kwargs):
-        # adjust the batch_size of prompt_embeds according to guidance_scale
-        if step_index == int(pipeline.num_timesteps * 0.4):
-                prompt_embeds = callback_kwargs["prompt_embeds"]
-                prompt_embeds = prompt_embeds.chunk(2)[-1]
-
-                # update guidance_scale and prompt_embeds
-                pipeline._guidance_scale = 0.0
-                callback_kwargs["prompt_embeds"] = prompt_embeds
-        return callback_kwargs
-```
-
-Now, you can pass the callback function to the `callback_on_step_end` parameter and the `prompt_embeds` to `callback_on_step_end_tensor_inputs`.
+Early stopping is useful if you aren't happy with the intermediate results during generation. This callback sets a hardcoded stop point after which the pipeline terminates by setting the `_interrupt` attribute to `True`.

 ```py
-import torch
-from diffusers import StableDiffusionPipeline
-
-pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16)
-pipeline = pipeline.to("cuda")
-
-prompt = "a photo of an astronaut riding a horse on mars"
-
-generator = torch.Generator(device="cuda").manual_seed(1)
-out = pipeline(
-    prompt,
-    generator=generator,
-    callback_on_step_end=callback_dynamic_cfg,
-    callback_on_step_end_tensor_inputs=['prompt_embeds']
-)
-
-out.images[0].save("out_custom_cfg.png")
-```
-
-## Interrupt the diffusion process
-
-> [!TIP]
-> The interruption callback is supported for text-to-image, image-to-image, and inpainting for the [StableDiffusionPipeline](../api/pipelines/stable_diffusion/overview) and [StableDiffusionXLPipeline](../api/pipelines/stable_diffusion/stable_diffusion_xl).
-
-Stopping the diffusion process early is useful when building UIs that work with Diffusers because it allows users to stop the generation process if they're unhappy with the intermediate results. You can incorporate this into your pipeline with a callback.
-
-This callback function should take the following arguments: `pipeline`, `i`, `t`, and `callback_kwargs` (this must be returned). Set the pipeline's `_interrupt` attribute to `True` to stop the diffusion process after a certain number of steps. You are also free to implement your own custom stopping logic inside the callback.
-
-In this example, the diffusion process is stopped after 10 steps even though `num_inference_steps` is set to 50.
-
-```python
-from diffusers import StableDiffusionPipeline
-
-pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
-pipeline.enable_model_cpu_offload()
-num_inference_steps = 50
+from diffusers import StableDiffusionXLPipeline

 def interrupt_callback(pipeline, i, t, callback_kwargs):
    stop_idx = 10
@@ -150,6 +68,11 @@ def interrupt_callback(pipeline, i, t, callback_kwargs):

    return callback_kwargs

+pipeline = StableDiffusionXLPipeline.from_pretrained(
+    "stable-diffusion-v1-5/stable-diffusion-v1-5"
+)
+num_inference_steps = 50
+
 pipeline(
    "A photo of a cat",
    num_inference_steps=num_inference_steps,
@@ -157,92 +80,11 @@ pipeline(
 )
 ```

-## IP Adapter Cutoff
+## Display intermediate images

-IP Adapter is an image prompt adapter that can be used for diffusion models without any changes to the underlying model. We can use the IP Adapter Cutoff Callback to disable the IP Adapter after a certain number of steps. To set up the callback, you need to specify the number of denoising steps after which the callback comes into effect. You can do so by using either one of these two arguments:
+Visualizing the intermediate images is useful for progress monitoring and assessing the quality of the generated content. This callback decodes the latent tensors at each step and converts them to images.

- `cutoff_step_ratio`: Float number with the ratio of the steps.
- `cutoff_step_index`: Integer number with the exact number of the step.
-
-We need to download the diffusion model and load the ip_adapter for it as follows:
-
-```py
-from diffusers import AutoPipelineForText2Image
-from diffusers.utils import load_image
-import torch
-
-pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
-pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
-pipeline.set_ip_adapter_scale(0.6)
-```
-The setup for the callback should look something like this:
-
-```py
-
-from diffusers import AutoPipelineForText2Image
-from diffusers.callbacks import IPAdapterScaleCutoffCallback
-from diffusers.utils import load_image
-import torch
- 
-
-pipeline = AutoPipelineForText2Image.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0", 
-    torch_dtype=torch.float16
-).to("cuda")
-
-
-pipeline.load_ip_adapter(
-    "h94/IP-Adapter", 
-    subfolder="sdxl_models", 
-    weight_name="ip-adapter_sdxl.bin"
-)
-
-pipeline.set_ip_adapter_scale(0.6)
-
-
-callback = IPAdapterScaleCutoffCallback(
-    cutoff_step_ratio=None, 
-    cutoff_step_index=5
-)
-
-image = load_image(
-    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ip_adapter_diner.png"
-)
-
-generator = torch.Generator(device="cuda").manual_seed(2628670641)
-
-images = pipeline(
-    prompt="a tiger sitting in a chair drinking orange juice",
-    ip_adapter_image=image,
-    negative_prompt="deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality",
-    generator=generator,
-    num_inference_steps=50,
-    callback_on_step_end=callback,
-).images
-
-images[0].save("custom_callback_img.png")
-```
-
-<div class="flex gap-4">
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/without_callback.png" alt="generated image of a tiger sitting in a chair drinking orange juice" />
-    <figcaption class="mt-2 text-center text-sm text-gray-500">without IPAdapterScaleCutoffCallback</figcaption>
-  </div>
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/with_callback2.png" alt="generated image of a tiger sitting in a chair drinking orange juice with ip adapter callback" />
-    <figcaption class="mt-2 text-center text-sm text-gray-500">with IPAdapterScaleCutoffCallback</figcaption>
-  </div>
-</div>
-
-
-## Display image after each generation step
-
-> [!TIP]
-> This tip was contributed by [asomoza](https://github.com/asomoza).
-
-Display an image after each generation step by accessing and converting the latents after each step into an image. The latent space is compressed to 128x128, so the images are also 128x128 which is useful for a quick preview.
-
-1. Use the function below to convert the SDXL latents (4 channels) to RGB tensors (3 channels) as explained in the [Explaining the SDXL latent space](https://huggingface.co/blog/TimothyAlexisVass/explaining-the-sdxl-latent-space) blog post.
+[Convert](https://huggingface.co/blog/TimothyAlexisVass/explaining-the-sdxl-latent-space) the Stable Diffusion XL latents from latents (4 channels) to RGB tensors (3 tensors).

 ```py
 def latents_to_rgb(latents):
@@ -260,7 +102,7 @@ def latents_to_rgb(latents):
    return Image.fromarray(image_array)
 ```

-2. Create a function to decode and save the latents into an image.
+Extract the latents and convert the first image in the batch to RGB. Save the image as a PNG file with the step number.

 ```py
 def decode_tensors(pipe, step, timestep, callback_kwargs):
@@ -272,19 +114,18 @@ def decode_tensors(pipe, step, timestep, callback_kwargs):
    return callback_kwargs
 ```

-3. Pass the `decode_tensors` function to the `callback_on_step_end` parameter to decode the tensors after each step. You also need to specify what you want to modify in the `callback_on_step_end_tensor_inputs` parameter, which in this case are the latents.
+Use the `callback_on_step_end_tensor_inputs` parameter to specify what input type to modify, which in this case, are the latents.

 ```py
-from diffusers import AutoPipelineForText2Image
 import torch
 from PIL import Image
+from diffusers import AutoPipelineForText2Image

 pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
-    variant="fp16",
-    use_safetensors=True
-).to("cuda")
+    device_map="cuda"
+)

 image = pipeline(
    prompt="A croissant shaped like a cute bear.",
@@ -293,27 +134,3 @@ image = pipeline(
    callback_on_step_end_tensor_inputs=["latents"],
 ).images[0]
 ```
-
-<div class="flex gap-4 justify-center">
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/tips_step_0.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">step 0</figcaption>
-  </div>
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/tips_step_19.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">step 19
-    </figcaption>
-  </div>
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/tips_step_29.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">step 29</figcaption>
-  </div>
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/tips_step_39.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">step 39</figcaption>
-  </div>
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/tips_step_49.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">step 49</figcaption>
-  </div>
-</div>
@@ -10,376 +10,163 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
 specific language governing permissions and limitations under the License.
 -->

-# Load community pipelines and components
-
 [[open-in-colab]]

-## Community pipelines
+# Community pipelines and components

-> [!TIP] Take a look at GitHub Issue [#841](https://github.com/huggingface/diffusers/issues/841) for more context about why we're adding community pipelines to help everyone easily share their work without being slowed down.
-
-Community pipelines are any [`DiffusionPipeline`] class that are different from the original paper implementation (for example, the [`StableDiffusionControlNetPipeline`] corresponds to the [Text-to-Image Generation with ControlNet Conditioning](https://huggingface.co/papers/2302.05543) paper). They provide additional functionality or extend the original implementation of a pipeline.
-
-There are many cool community pipelines like [Marigold Depth Estimation](https://github.com/huggingface/diffusers/tree/main/examples/community#marigold-depth-estimation) or [InstantID](https://github.com/huggingface/diffusers/tree/main/examples/community#instantid-pipeline), and you can find all the official community pipelines [here](https://github.com/huggingface/diffusers/tree/main/examples/community).
-
-There are two types of community pipelines, those stored on the Hugging Face Hub and those stored on Diffusers GitHub repository. Hub pipelines are completely customizable (scheduler, models, pipeline code, etc.) while Diffusers GitHub pipelines are only limited to custom pipeline code.
-
-|                | GitHub community pipeline                                                                                        | HF Hub community pipeline                                                                 |
-|----------------|------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------|
-| usage          | same                                                                                                             | same                                                                                      |
-| review process | open a Pull Request on GitHub and undergo a review process from the Diffusers team before merging; may be slower | upload directly to a Hub repository without any review; this is the fastest workflow      |
-| visibility     | included in the official Diffusers repository and documentation                                                  | included on your HF Hub profile and relies on your own usage/promotion to gain visibility |
-
-<hfoptions id="community">
-<hfoption id="Hub pipelines">
-
-To load a Hugging Face Hub community pipeline, pass the repository id of the community pipeline to the `custom_pipeline` argument and the model repository where you'd like to load the pipeline weights and components from. For example, the example below loads a dummy pipeline from [hf-internal-testing/diffusers-dummy-pipeline](https://huggingface.co/hf-internal-testing/diffusers-dummy-pipeline/blob/main/pipeline.py) and the pipeline weights and components from [google/ddpm-cifar10-32](https://huggingface.co/google/ddpm-cifar10-32):
-
-> [!WARNING]
-> By loading a community pipeline from the Hugging Face Hub, you are trusting that the code you are loading is safe. Make sure to inspect the code online before loading and running it automatically!
-
-```py
-from diffusers import DiffusionPipeline
-
-pipeline = DiffusionPipeline.from_pretrained(
-    "google/ddpm-cifar10-32", custom_pipeline="hf-internal-testing/diffusers-dummy-pipeline", use_safetensors=True
-)
-```
-
-</hfoption>
-<hfoption id="GitHub pipelines">
-
-To load a GitHub community pipeline, pass the repository id of the community pipeline to the `custom_pipeline` argument and the model repository where you you'd like to load the pipeline weights and components from. You can also load model components directly. The example below loads the community [CLIP Guided Stable Diffusion](https://github.com/huggingface/diffusers/tree/main/examples/community#clip-guided-stable-diffusion) pipeline and the CLIP model components.
-
-```py
-from diffusers import DiffusionPipeline
-from transformers import CLIPImageProcessor, CLIPModel
-
-clip_model_id = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
-
-feature_extractor = CLIPImageProcessor.from_pretrained(clip_model_id)
-clip_model = CLIPModel.from_pretrained(clip_model_id)
-
-pipeline = DiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5",
-    custom_pipeline="clip_guided_stable_diffusion",
-    clip_model=clip_model,
-    feature_extractor=feature_extractor,
-    use_safetensors=True,
-)
-```
-
-</hfoption>
-</hfoptions>
-
-### Load from a local file
-
-Community pipelines can also be loaded from a local file if you pass a file path instead. The path to the passed directory must contain a pipeline.py file that contains the pipeline class.
-
-```py
-pipeline = DiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5",
-    custom_pipeline="./path/to/pipeline_directory/",
-    clip_model=clip_model,
-    feature_extractor=feature_extractor,
-    use_safetensors=True,
-)
-```
-
-### Load from a specific version
-
-By default, community pipelines are loaded from the latest stable version of Diffusers. To load a community pipeline from another version, use the `custom_revision` parameter.
-
-<hfoptions id="version">
-<hfoption id="main">
-
-For example, to load from the main branch:
-
-```py
-pipeline = DiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5",
-    custom_pipeline="clip_guided_stable_diffusion",
-    custom_revision="main",
-    clip_model=clip_model,
-    feature_extractor=feature_extractor,
-    use_safetensors=True,
-)
-```
-
-</hfoption>
-<hfoption id="older version">
-
-For example, to load from a previous version of Diffusers like v0.25.0:
-
-```py
-pipeline = DiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5",
-    custom_pipeline="clip_guided_stable_diffusion",
-    custom_revision="v0.25.0",
-    clip_model=clip_model,
-    feature_extractor=feature_extractor,
-    use_safetensors=True,
-)
-```
-
-</hfoption>
-</hfoptions>
-
-### Load with from_pipe
-
-Community pipelines can also be loaded with the [`~DiffusionPipeline.from_pipe`] method which allows you to load and reuse multiple pipelines without any additional memory overhead (learn more in the [Reuse a pipeline](./loading#reuse-a-pipeline) guide). The memory requirement is determined by the largest single pipeline loaded.
-
-For example, let's load a community pipeline that supports [long prompts with weighting](https://github.com/huggingface/diffusers/tree/main/examples/community#long-prompt-weighting-stable-diffusion) from a Stable Diffusion pipeline.
-
-```py
-import torch
-from diffusers import DiffusionPipeline
-
-pipe_sd = DiffusionPipeline.from_pretrained("emilianJR/CyberRealistic_V3", torch_dtype=torch.float16)
-pipe_sd.to("cuda")
-# load long prompt weighting pipeline
-pipe_lpw = DiffusionPipeline.from_pipe(
-    pipe_sd,
-    custom_pipeline="lpw_stable_diffusion",
-).to("cuda")
-
-prompt = "cat, hiding in the leaves, ((rain)), zazie rainyday, beautiful eyes, macro shot, colorful details, natural lighting, amazing composition, subsurface scattering, amazing textures, filmic, soft light, ultra-detailed eyes, intricate details, detailed texture, light source contrast, dramatic shadows, cinematic light, depth of field, film grain, noise, dark background, hyperrealistic dslr film still, dim volumetric cinematic lighting"
-neg_prompt = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
-generator = torch.Generator(device="cpu").manual_seed(20)
-out_lpw = pipe_lpw(
-    prompt,
-    negative_prompt=neg_prompt,
-    width=512,
-    height=512,
-    max_embeddings_multiples=3,
-    num_inference_steps=50,
-    generator=generator,
-    ).images[0]
-out_lpw
-```
-
-<div class="flex gap-4">
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/from_pipe_lpw.png" />
-    <figcaption class="mt-2 text-center text-sm text-gray-500">Stable Diffusion with long prompt weighting</figcaption>
-  </div>
-  <div>
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/from_pipe_non_lpw.png" />
-    <figcaption class="mt-2 text-center text-sm text-gray-500">Stable Diffusion</figcaption>
-  </div>
-</div>
-
-## Example community pipelines
-
-Community pipelines are a really fun and creative way to extend the capabilities of the original pipeline with new and unique features. You can find all community pipelines in the [diffusers/examples/community](https://github.com/huggingface/diffusers/tree/main/examples/community) folder with inference and training examples for how to use them.
-
-This section showcases a couple of the community pipelines and hopefully it'll inspire you to create your own (feel free to open a PR for your community pipeline and ping us for a review)!
+Community pipelines are [`DiffusionPipeline`] classes that are different from the original paper implementation. They provide additional functionality or extend the original pipeline implementation.

 > [!TIP]
-> The [`~DiffusionPipeline.from_pipe`] method is particularly useful for loading community pipelines because many of them don't have pretrained weights and add a feature on top of an existing pipeline like Stable Diffusion or Stable Diffusion XL. You can learn more about the [`~DiffusionPipeline.from_pipe`] method in the [Load with from_pipe](custom_pipeline_overview#load-with-from_pipe) section.
+> Check out the community pipelines in [diffusers/examples/community](https://github.com/huggingface/diffusers/tree/main/examples/community) with inference and training examples for how to use them.

-<hfoptions id="community">
-<hfoption id="Marigold">
+Community pipelines are either stored on the Hub or the Diffusers' GitHub repository. Hub pipelines are completely customizable (scheduler, models, pipeline code, etc.) while GitHub pipelines are limited to only the custom pipeline code. Further compare the two community pipeline types in the table below.

-[Marigold](https://marigoldmonodepth.github.io/) is a depth estimation diffusion pipeline that uses the rich existing and inherent visual knowledge in diffusion models. It takes an input image and denoises and decodes it into a depth map. Marigold performs well even on images it hasn't seen before.
+|  | GitHub | Hub |
+|---|---|---|
+| Usage | Same. | Same. |
+| Review process | Open a Pull Request on GitHub and undergo a review process from the Diffusers team before merging. This option is slower. | Upload directly to a Hub repository without a review. This is the fastest option. |
+| Visibility | Included in the official Diffusers repository and docs. | Included on your Hub profile and relies on your own usage and promotion to gain visibility. |
+
+## custom_pipeline
+
+Load either community pipeline types by passing the `custom_pipeline` argument to [`~DiffusionPipeline.from_pretrained`].

 ```py
 import torch
-from PIL import Image
 from diffusers import DiffusionPipeline
-from diffusers.utils import load_image

 pipeline = DiffusionPipeline.from_pretrained(
-    "prs-eth/marigold-lcm-v1-0",
-    custom_pipeline="marigold_depth_estimation",
+    "stabilityai/stable-diffusion-3-medium-diffusers",
+    custom_pipeline="pipeline_stable_diffusion_3_instruct_pix2pix",
    torch_dtype=torch.float16,
-    variant="fp16",
+    device_map="cuda"
 )
-
-pipeline.to("cuda")
-image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/community-marigold.png")
-output = pipeline(
-    image,
-    denoising_steps=4,
-    ensemble_size=5,
-    processing_res=768,
-    match_input_res=True,
-    batch_size=0,
-    seed=33,
-    color_map="Spectral",
-    show_progress_bar=True,
-)
-depth_colored: Image.Image = output.depth_colored
-depth_colored.save("./depth_colored.png")
 ```

-<div class="flex flex-row gap-4">
-  <div class="flex-1">
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/community-marigold.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">original image</figcaption>
-  </div>
-  <div class="flex-1">
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/marigold-depth.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">colorized depth image</figcaption>
-  </div>
-</div>
-
-</hfoption>
-<hfoption id="HD-Painter">
-
-[HD-Painter](https://hf.co/papers/2312.14091) is a high-resolution inpainting pipeline. It introduces a *Prompt-Aware Introverted Attention (PAIntA)* layer to better align a prompt with the area to be inpainted, and *Reweighting Attention Score Guidance (RASG)* to keep the latents more prompt-aligned and within their trained domain to generate realistc images.
+Add the `custom_revision` argument to [`~DiffusionPipeline.from_pretrained`] to load a community pipeline from a specific version (for example, `v0.30.0` or `main`). By default, community pipelines are loaded from the latest stable version of Diffusers.

 ```py
 import torch
-from diffusers import DiffusionPipeline, DDIMScheduler
-from diffusers.utils import load_image
+from diffusers import DiffusionPipeline

 pipeline = DiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5-inpainting",
-    custom_pipeline="hd_painter"
+    "stabilityai/stable-diffusion-3-medium-diffusers",
+    custom_pipeline="pipeline_stable_diffusion_3_instruct_pix2pix",
+    custom_revision="main"
+    torch_dtype=torch.float16,
+    device_map="cuda"
 )
-pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
-init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hd-painter.jpg")
-mask_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hd-painter-mask.png")
-prompt = "football"
-image = pipeline(prompt, init_image, mask_image, use_rasg=True, use_painta=True, generator=torch.manual_seed(0)).images[0]
-image
 ```

-<div class="flex flex-row gap-4">
-  <div class="flex-1">
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hd-painter.jpg"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">original image</figcaption>
-  </div>
-  <div class="flex-1">
-    <img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hd-painter-output.png"/>
-    <figcaption class="mt-2 text-center text-sm text-gray-500">generated image</figcaption>
-  </div>
-</div>
+> [!WARNING]
+> While the Hugging Face Hub [scans](https://huggingface.co/docs/hub/security-malware) files, you should still inspect the Hub pipeline code and make sure it is safe.

-</hfoption>
-</hfoptions>
+There are a few ways to load a community pipeline.
+
+- Pass a path to `custom_pipeline` to load a local community pipeline. The directory must contain a `pipeline.py` file containing the pipeline class.
+
+  ```py
+  import torch
+  from diffusers import DiffusionPipeline
+
+  pipeline = DiffusionPipeline.from_pretrained(
+      "stabilityai/stable-diffusion-3-medium-diffusers",
+      custom_pipeline="path/to/pipeline_directory",
+      torch_dtype=torch.float16,
+      device_map="cuda"
+  )
+  ```
+
+- The `custom_pipeline` argument is also supported by [`~DiffusionPipeline.from_pipe`], which is useful for [reusing pipelines](./loading#reuse-a-pipeline) without using additional memory. It limits the memory usage to only the largest pipeline loaded.
+
+  ```py
+  import torch
+  from diffusers import DiffusionPipeline
+
+  pipeline_sd = DiffusionPipeline.from_pretrained("emilianJR/CyberRealistic_V3", torch_dtype=torch.float16, device_map="cuda")
+  pipeline_lpw = DiffusionPipeline.from_pipe(
+      pipeline_sd, custom_pipeline="lpw_stable_diffusion", device_map="cuda"
+  )
+  ```
+
+  The [`~DiffusionPipeline.from_pipe`] method is especially useful for loading community pipelines because many of them don't have pretrained weights. Community pipelines generally add a feature on top of an existing pipeline.

 ## Community components

-Community components allow users to build pipelines that may have customized components that are not a part of Diffusers. If your pipeline has custom components that Diffusers doesn't already support, you need to provide their implementations as Python modules. These customized components could be a VAE, UNet, and scheduler. In most cases, the text encoder is imported from the Transformers library. The pipeline code itself can also be customized.
+Community components let users build pipelines with custom transformers, UNets, VAEs, and schedulers not supported by Diffusers. These components require Python module implementations. 

-This section shows how users should use community components to build a community pipeline.
+This section shows how users can use community components to build a community pipeline using [showlab/show-1-base](https://huggingface.co/showlab/show-1-base) as an example.

-You'll use the [showlab/show-1-base](https://huggingface.co/showlab/show-1-base) pipeline checkpoint as an example.
-
-1. Import and load the text encoder from Transformers:
-
-```python
-from transformers import T5Tokenizer, T5EncoderModel
-
-pipe_id = "showlab/show-1-base"
-tokenizer = T5Tokenizer.from_pretrained(pipe_id, subfolder="tokenizer")
-text_encoder = T5EncoderModel.from_pretrained(pipe_id, subfolder="text_encoder")
-```
-
-2. Load a scheduler:
+1. Load the required components, the scheduler and image processor. The text encoder is generally imported from [Transformers](https://huggingface.co/docs/transformers/index).

 ```python
+from transformers import T5Tokenizer, T5EncoderModel, CLIPImageProcessor
 from diffusers import DPMSolverMultistepScheduler

+pipeline_id = "showlab/show-1-base"
+tokenizer = T5Tokenizer.from_pretrained(pipeline_id, subfolder="tokenizer")
+text_encoder = T5EncoderModel.from_pretrained(pipeline_id, subfolder="text_encoder")
 scheduler = DPMSolverMultistepScheduler.from_pretrained(pipe_id, subfolder="scheduler")
-```
-
-3. Load an image processor:
-
-```python
-from transformers import CLIPImageProcessor
-
 feature_extractor = CLIPImageProcessor.from_pretrained(pipe_id, subfolder="feature_extractor")
 ```

-<Tip warning={true}>
+> [!WARNING]
+> In steps 2 and 3, the custom [UNet](https://github.com/showlab/Show-1/blob/main/showone/models/unet_3d_condition.py) and [pipeline](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/unet/showone_unet_3d_condition.py) implementation must match the format shown in their files for this example to work.

-In steps 4 and 5, the custom [UNet](https://github.com/showlab/Show-1/blob/main/showone/models/unet_3d_condition.py) and [pipeline](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/unet/showone_unet_3d_condition.py) implementation must match the format shown in their files for this example to work.
-
-</Tip>
-
-4. Now you'll load a [custom UNet](https://github.com/showlab/Show-1/blob/main/showone/models/unet_3d_condition.py), which in this example, has already been implemented in [showone_unet_3d_condition.py](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/unet/showone_unet_3d_condition.py) for your convenience. You'll notice the [`UNet3DConditionModel`] class name is changed to `ShowOneUNet3DConditionModel` because [`UNet3DConditionModel`] already exists in Diffusers. Any components needed for the `ShowOneUNet3DConditionModel` class should be placed in showone_unet_3d_condition.py.
-
-    Once this is done, you can initialize the UNet:
-
-    ```python
-    from showone_unet_3d_condition import ShowOneUNet3DConditionModel
-
-    unet = ShowOneUNet3DConditionModel.from_pretrained(pipe_id, subfolder="unet")
-    ```
-
-5. Finally, you'll load the custom pipeline code. For this example, it has already been created for you in [pipeline_t2v_base_pixel.py](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/pipeline_t2v_base_pixel.py). This script contains a custom `TextToVideoIFPipeline` class for generating videos from text. Just like the custom UNet, any code needed for the custom pipeline to work should go in pipeline_t2v_base_pixel.py.
-
-Once everything is in place, you can initialize the `TextToVideoIFPipeline` with the `ShowOneUNet3DConditionModel`:
+2. Load a [custom UNet](https://github.com/showlab/Show-1/blob/main/showone/models/unet_3d_condition.py) which is already implemented in [showone_unet_3d_condition.py](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/unet/showone_unet_3d_condition.py). The [`UNet3DConditionModel`] class name is renamed to the custom implementation, `ShowOneUNet3DConditionModel`, because [`UNet3DConditionModel`] already exists in Diffusers. Any components required for `ShowOneUNet3DConditionModel` class should be placed in `showone_unet_3d_condition.py`.
+
+```python
+from showone_unet_3d_condition import ShowOneUNet3DConditionModel
+
+unet = ShowOneUNet3DConditionModel.from_pretrained(pipeline_id, subfolder="unet")
+```
+
+3. Load the custom pipeline code (already implemented in [pipeline_t2v_base_pixel.py](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/pipeline_t2v_base_pixel.py)). This script contains a custom `TextToVideoIFPipeline` class for generating videos from text. Like the custom UNet, any code required for `TextToVideIFPipeline` should be placed in `pipeline_t2v_base_pixel.py`.
+
+Initialize `TextToVideoIFPipeline` with `ShowOneUNet3DConditionModel`.

 ```python
-from pipeline_t2v_base_pixel import TextToVideoIFPipeline
 import torch
+from pipeline_t2v_base_pixel import TextToVideoIFPipeline

 pipeline = TextToVideoIFPipeline(
    unet=unet,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    scheduler=scheduler,
-    feature_extractor=feature_extractor
+    feature_extractor=feature_extractor,
+    device_map="cuda",
+    torch_dtype=torch.float16
 )
-pipeline = pipeline.to(device="cuda")
-pipeline.torch_dtype = torch.float16
 ```

-Push the pipeline to the Hub to share with the community!
+4. Push the pipeline to the Hub to share with the community.

 ```python
 pipeline.push_to_hub("custom-t2v-pipeline")
 ```

-After the pipeline is successfully pushed, you need to make a few changes:
+After the pipeline is successfully pushed, make the following changes.

-1. Change the `_class_name` attribute in [model_index.json](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/model_index.json#L2) to `"pipeline_t2v_base_pixel"` and `"TextToVideoIFPipeline"`.
-2. Upload `showone_unet_3d_condition.py` to the [unet](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/unet/showone_unet_3d_condition.py) subfolder.
-3. Upload `pipeline_t2v_base_pixel.py` to the pipeline [repository](https://huggingface.co/sayakpaul/show-1-base-with-code/tree/main).
+- Change the `_class_name` attribute in [model_index.json](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/model_index.json#L2) to `"pipeline_t2v_base_pixel"` and `"TextToVideoIFPipeline"`.
+- Upload `showone_unet_3d_condition.py` to the [unet](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/unet/showone_unet_3d_condition.py) subfolder.
+- Upload `pipeline_t2v_base_pixel.py` to the pipeline [repository](https://huggingface.co/sayakpaul/show-1-base-with-code/tree/main).

 To run inference, add the `trust_remote_code` argument while initializing the pipeline to handle all the "magic" behind the scenes.

-> [!WARNING]
-> As an additional precaution with `trust_remote_code=True`, we strongly encourage you to pass a commit hash to the `revision` parameter in [`~DiffusionPipeline.from_pretrained`] to make sure the code hasn't been updated with some malicious new lines of code (unless you fully trust the model owners).
-
 ```python
-from diffusers import DiffusionPipeline
 import torch
+from diffusers import DiffusionPipeline

 pipeline = DiffusionPipeline.from_pretrained(
    "<change-username>/<change-id>", trust_remote_code=True, torch_dtype=torch.float16
-).to("cuda")
-
-prompt = "hello"
-
-# Text embeds
-prompt_embeds, negative_embeds = pipeline.encode_prompt(prompt)
-
-# Keyframes generation (8x64x40, 2fps)
-video_frames = pipeline(
-    prompt_embeds=prompt_embeds,
-    negative_prompt_embeds=negative_embeds,
-    num_frames=8,
-    height=40,
-    width=64,
-    num_inference_steps=2,
-    guidance_scale=9.0,
-    output_type="pt"
-).frames
-```
-
-As an additional reference, take a look at the repository structure of [stabilityai/japanese-stable-diffusion-xl](https://huggingface.co/stabilityai/japanese-stable-diffusion-xl/) which also uses the `trust_remote_code` feature.
-
-```python
-from diffusers import DiffusionPipeline
-import torch
-
-pipeline = DiffusionPipeline.from_pretrained(
-    "stabilityai/japanese-stable-diffusion-xl", trust_remote_code=True
 )
-pipeline.to("cuda")
 ```
+
+> [!WARNING]
+> As an additional precaution with `trust_remote_code=True`, we strongly encourage passing a commit hash to the `revision` argument in [`~DiffusionPipeline.from_pretrained`] to make sure the code hasn't been updated with new malicious code (unless you fully trust the model owners).
+
+## Resources
+
+- Take a look at Issue [#841](https://github.com/huggingface/diffusers/issues/841) for more context about why we're adding community pipelines to help everyone easily share their work without being slowed down.
+- Check out the [stabilityai/japanese-stable-diffusion-xl](https://huggingface.co/stabilityai/japanese-stable-diffusion-xl/) repository for an additional example of a community pipeline that also uses the `trust_remote_code` feature.
@@ -10,13 +10,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
 specific language governing permissions and limitations under the License.
 -->

-# Controlling image quality
-
-The components of a diffusion model, like the UNet and scheduler, can be optimized to improve the quality of generated images leading to better details. These techniques are especially useful if you don't have the resources to simply use a larger model for inference. You can enable these techniques during inference without any additional training.
-
-This guide will show you how to turn these techniques on in your pipeline and how to configure them to improve the quality of your generated images.
-
-## Details
+# FreeU

 [FreeU](https://hf.co/papers/2309.11497) improves image details by rebalancing the UNet's backbone and skip connection weights. The skip connections can cause the model to overlook some of the backbone semantics which may lead to unnatural image details in the generated image. This technique does not require any additional training and can be applied on the fly during inference for tasks like image-to-image and text-to-video.

@@ -139,7 +133,7 @@ export_to_video(video_frames, "teddy_bear.mp4", fps=10)
 </hfoption>
 </hfoptions>

-Call the [`pipelines.StableDiffusionMixin.disable_freeu`] method to disable FreeU.
+Call the [`~pipelines.StableDiffusionMixin.disable_freeu`] method to disable FreeU.

 ```py
 pipeline.disable_freeu()
@@ -10,116 +10,143 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
 specific language governing permissions and limitations under the License.
 -->

-# Load pipelines
-
 [[open-in-colab]]

-Diffusion systems consist of multiple components like parameterized models and schedulers that interact in complex ways. That is why we designed the [`DiffusionPipeline`] to wrap the complexity of the entire diffusion system into an easy-to-use API. At the same time, the [`DiffusionPipeline`] is entirely customizable so you can modify each component to build a diffusion system for your use case.
+# DiffusionPipeline

-This guide will show you how to load:
+Diffusion models consists of multiple components like UNets or diffusion transformers (DiTs), text encoders, variational autoencoders (VAEs), and schedulers. The [`DiffusionPipeline`] wraps all of these components into a single easy-to-use API without giving up the flexibility to modify it's components.

- pipelines from the Hub and locally
- different components into a pipeline
- multiple pipelines without increasing memory usage
- checkpoint variants such as different floating point types or non-exponential mean averaged (EMA) weights
+This guide will show you how to load a [`DiffusionPipeline`].

-## Load a pipeline
+## Loading a pipeline

-> [!TIP]
-> Skip to the [DiffusionPipeline explained](#diffusionpipeline-explained) section if you're interested in an explanation about how the [`DiffusionPipeline`] class works.
+[`DiffusionPipeline`] is a base pipeline class that automatically selects and returns an instance of a model's pipeline subclass, like [`QwenImagePipeline`], by scanning the `model_index.json` file for the class name.

-There are two ways to load a pipeline for a task:
-
-1. Load the generic [`DiffusionPipeline`] class and allow it to automatically detect the correct pipeline class from the checkpoint.
-2. Load a specific pipeline class for a specific task.
-
-<hfoptions id="pipelines">
-<hfoption id="generic pipeline">
-
-The [`DiffusionPipeline`] class is a simple and generic way to load the latest trending diffusion model from the [Hub](https://huggingface.co/models?library=diffusers&sort=trending). It uses the [`~DiffusionPipeline.from_pretrained`] method to automatically detect the correct pipeline class for a task from the checkpoint, downloads and caches all the required configuration and weight files, and returns a pipeline ready for inference.
-
-```python
-from diffusers import DiffusionPipeline
-
-pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", use_safetensors=True)
-```
-
-This same checkpoint can also be used for an image-to-image task. The [`DiffusionPipeline`] class can handle any task as long as you provide the appropriate inputs. For example, for an image-to-image task, you need to pass an initial image to the pipeline.
+Pass a model id to [`~DiffusionPipeline.from_pretrained`] to load a pipeline.

 ```py
-from diffusers import DiffusionPipeline
-
-pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", use_safetensors=True)
-
-init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-init.png")
-prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
-image = pipeline("Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", image=init_image).images[0]
-```
-
-</hfoption>
-<hfoption id="specific pipeline">
-
-Checkpoints can be loaded by their specific pipeline class if you already know it. For example, to load a Stable Diffusion model, use the [`StableDiffusionPipeline`] class.
-
-```python
-from diffusers import StableDiffusionPipeline
-
-pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", use_safetensors=True)
-```
-
-This same checkpoint may also be used for another task like image-to-image. To differentiate what task you want to use the checkpoint for, you have to use the corresponding task-specific pipeline class. For example, to use the same checkpoint for image-to-image, use the [`StableDiffusionImg2ImgPipeline`] class.
-
-```py
-from diffusers import StableDiffusionImg2ImgPipeline
-
-pipeline = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", use_safetensors=True)
-```
-
-</hfoption>
-</hfoptions>
-
-Use the Space below to gauge a pipeline's memory requirements before you download and load it to see if it runs on your hardware.
-
-<div class="block dark:hidden">
-	<iframe
-        src="https://diffusers-compute-pipeline-size.hf.space?__theme=light"
-        width="850"
-        height="1600"
-    ></iframe>
-</div>
-<div class="hidden dark:block">
-    <iframe
-        src="https://diffusers-compute-pipeline-size.hf.space?__theme=dark"
-        width="850"
-        height="1600"
-    ></iframe>
-</div>
-
-### Specifying Component-Specific Data Types
-
-You can customize the data types for individual sub-models by passing a dictionary to the `torch_dtype` parameter. This allows you to load different components of a pipeline in different floating point precisions. For instance, if you want to load the transformer with `torch.bfloat16` and all other components with `torch.float16`, you can pass a dictionary mapping:
-
-```python
-from diffusers import HunyuanVideoPipeline
 import torch
+from diffusers import DiffusionPipeline

-pipe = HunyuanVideoPipeline.from_pretrained(
-    "hunyuanvideo-community/HunyuanVideo",
-    torch_dtype={"transformer": torch.bfloat16, "default": torch.float16},
+pipeline = DiffusionPipeline.from_pretrained(
+  "Qwen/Qwen-Image", torch_dtype=torch.bfloat16, device_map="cuda"
 )
-print(pipe.transformer.dtype, pipe.vae.dtype)  # (torch.bfloat16, torch.float16)
 ```

-If a component is not explicitly specified in the dictionary and no `default` is provided, it will be loaded with `torch.float32`.
+Every model has a specific pipeline subclass that inherits from [`DiffusionPipeline`]. A subclass usually has a narrow focus and are task-specific. See the table below for an example.

-### Parallel loading
+| pipeline subclass | task |
+|---|---|
+| [`QwenImagePipeline`] | text-to-image |
+| [`QwenImageImg2ImgPipeline`] | image-to-image |
+| [`QwenImageInpaintPipeline`] | inpaint |
+
+You could use the subclass directly by passing a model id to [`~QwenImagePipeline.from_pretrained`].
+
+```py
+import torch
+from diffusers import QwenImagePipeline
+
+pipeline = QwenImagePipeline.from_pretrained(
+  "Qwen/Qwen-Image", torch_dtype=torch.bfloat16, device_map="cuda"
+)
+```
+
+### Local pipelines
+
+Pipelines can also be run locally. Use [`~huggingface_hub.snapshot_download`] to download a model repository.
+
+```py
+from huggingface_hub import snapshot_download
+
+snapshot_download(repo_id="Qwen/Qwen-Image")
+```
+
+The model is downloaded to your [cache](../installation#cache). Pass the folder path to [`~QwenImagePipeline.from_pretrained`] to load it.
+
+```py
+import torch
+from diffusers import QwenImagePipeline
+
+pipeline = QwenImagePipeline.from_pretrained(
+  "path/to/your/cache", torch_dtype=torch.bfloat16, device_map="cuda"
+)
+```
+
+The [`~QwenImagePipeline.from_pretrained`] method won't download files from the Hub when it detects a local path. But this also means it won't download and cache any updates that have been made to the model either.
+
+## Pipeline data types
+
+Use the `torch_dtype` argument in [`~DiffusionPipeline.from_pretrained`] to load a model with a specific data type. This allows you to load different models in different precisions. For example, loading a large transformer model in half-precision reduces the memory required.
+
+Pass the data type for each model as a dictionary to `torch_dtype`. Use the `default` key to set the default data type. If a model isn't in the dictionary and `default` isn't provided, it is loaded in full precision (`torch.float32`).
+
+```py
+import torch
+from diffusers import QwenImagePipeline
+
+pipeline = QwenImagePipeline.from_pretrained(
+  "Qwen/Qwen-Image",
+  torch_dtype={"transformer": torch.bfloat16, "default": torch.float16},
+)
+print(pipeline.transformer.dtype, pipeline.vae.dtype)
+```
+
+You don't need to use a dictionary if you're loading all the models in the same data type.
+
+```py
+import torch
+from diffusers import QwenImagePipeline
+
+pipeline = QwenImagePipeline.from_pretrained(
+  "Qwen/Qwen-Image", torch_dtype=torch.bfloat16
+)
+print(pipeline.transformer.dtype, pipeline.vae.dtype)
+```
+
+## Device placement
+
+The `device_map` argument determines individual model or pipeline placement on an accelerator like a GPU. It is especially helpful when there are multiple GPUs.
+
+A pipeline supports two options for `device_map`, `"cuda"` and `"balanced"`. Refer to the table below to compare the placement strategies.
+
+| parameter | description |
+|---|---|
+| `"cuda"` | places pipeline on a supported accelerator device like CUDA |
+| `"balanced"` | evenly distributes pipeline on all GPUs |
+
+Use the `max_memory` argument in [`~DiffusionPipeline.from_pretrained`] to allocate a maximum amount of memory to use on each device. By default, Diffusers uses the maximum amount available.
+
+```py
+import torch
+from diffusers import DiffusionPipeline
+
+max_memory = {0: "16GB", 1: "16GB"}
+pipeline = DiffusionPipeline.from_pretrained(
+  "Qwen/Qwen-Image", 
+  torch_dtype=torch.bfloat16,
+  device_map="cuda",
+)
+```
+
+The `hf_device_map` attribute allows you to access and view the `device_map`.
+
+```py
+print(pipeline.hf_device_map)
+# {'unet': 1, 'vae': 1, 'safety_checker': 0, 'text_encoder': 0}
+```
+
+Reset a pipeline's `device_map` with the [`~DiffusionPipeline.reset_device_map`] method. This is necessary if you want to use methods such as `.to()`, [`~DiffusionPipeline.enable_sequential_cpu_offload`], and [`~DiffusionPipeline.enable_model_cpu_offload`].
+
+```py
+pipeline.reset_device_map()
+```
+
+## Parallel loading

 Large models are often [sharded](../training/distributed_inference#model-sharding) into smaller files so that they are easier to load. Diffusers supports loading shards in parallel to speed up the loading process.

-Set the environment variables below to enable parallel loading.
-
- Set `HF_ENABLE_PARALLEL_LOADING` to `"YES"` to enable parallel loading of shards.
- Set `HF_PARALLEL_LOADING_WORKERS` to configure the number of parallel threads to use when loading shards. More workers loads a model faster but uses more memory.
+Set `HF_ENABLE_PARALLEL_LOADING` to `"YES"` to enable parallel loading of shards.

 The `device_map` argument should be set to `"cuda"` to pre-allocate a large chunk of memory based on the model size. This substantially reduces model load time because warming up the memory allocator now avoids many smaller calls to the allocator later.

@@ -129,479 +156,94 @@ import torch
 from diffusers import DiffusionPipeline

 os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES"
+
 pipeline = DiffusionPipeline.from_pretrained(
-    "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
-    torch_dtype=torch.bfloat16,
-    device_map="cuda"
+  "Wan-AI/Wan2.2-I2V-A14B-Diffusers", torch_dtype=torch.bfloat16, device_map="cuda"
 )
 ```

-### Local pipeline
+## Replacing models in a pipeline

-To load a pipeline locally, use [git-lfs](https://git-lfs.github.com/) to manually download a checkpoint to your local disk.
+[`DiffusionPipeline`] is flexible and accommodates loading different models or schedulers. You can experiment with different schedulers to optimize for generation speed or quality, and you can replace models with more performant ones.

-```bash
-git-lfs install
-git clone https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5
-```
-
-This creates a local folder, ./stable-diffusion-v1-5, on your disk and you should pass its path to [`~DiffusionPipeline.from_pretrained`].
-
-```python
-from diffusers import DiffusionPipeline
-
-stable_diffusion = DiffusionPipeline.from_pretrained("./stable-diffusion-v1-5", use_safetensors=True)
-```
-
-The [`~DiffusionPipeline.from_pretrained`] method won't download files from the Hub when it detects a local path, but this also means it won't download and cache the latest changes to a checkpoint.
-
-## Customize a pipeline
-
-You can customize a pipeline by loading different components into it. This is important because you can:
-
- change to a scheduler with faster generation speed or higher generation quality depending on your needs (call the `scheduler.compatibles` method on your pipeline to see compatible schedulers)
- change a default pipeline component to a newer and better performing one
-
-For example, let's customize the default [stabilityai/stable-diffusion-xl-base-1.0](https://hf.co/stabilityai/stable-diffusion-xl-base-1.0) checkpoint with:
-
- The [`HeunDiscreteScheduler`] to generate higher quality images at the expense of slower generation speed. You must pass the `subfolder="scheduler"` parameter in [`~HeunDiscreteScheduler.from_pretrained`] to load the scheduler configuration into the correct [subfolder](https://hf.co/stabilityai/stable-diffusion-xl-base-1.0/tree/main/scheduler) of the pipeline repository.
- A more stable VAE that runs in fp16.
+The example below uses a more stable VAE version.

 ```py
-from diffusers import StableDiffusionXLPipeline, HeunDiscreteScheduler, AutoencoderKL
 import torch
+from diffusers import DiffusionPipeline, AutoModel

-scheduler = HeunDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
-vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
-```
+vae = AutoModel.from_pretrained(
+  "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
+)

-Now pass the new scheduler and VAE to the [`StableDiffusionXLPipeline`].
-
-```py
-pipeline = StableDiffusionXLPipeline.from_pretrained(
+pipeline = DiffusionPipeline.from_pretrained(
  "stabilityai/stable-diffusion-xl-base-1.0",
-  scheduler=scheduler,
  vae=vae,
  torch_dtype=torch.float16,
-  variant="fp16",
-  use_safetensors=True
-).to("cuda")
+  device_map="cuda"
+)
 ```

-## Reuse a pipeline
+## Reusing models in multiple pipelines

-When you load multiple pipelines that share the same model components, it makes sense to reuse the shared components instead of reloading everything into memory again, especially if your hardware is memory-constrained. For example:
+When working with multiple pipelines that use the same model, the [`~DiffusionPipeline.from_pipe`] method enables reusing a model instead of reloading it each time. This allows you to use multiple pipelines without increasing memory usage.

-1. You generated an image with the [`StableDiffusionPipeline`] but you want to improve its quality with the [`StableDiffusionSAGPipeline`]. Both of these pipelines share the same pretrained model, so it'd be a waste of memory to load the same model twice.
-2. You want to add a model component, like a [`MotionAdapter`](../api/pipelines/animatediff#animatediffpipeline), to [`AnimateDiffPipeline`] which was instantiated from an existing [`StableDiffusionPipeline`]. Again, both pipelines share the same pretrained model, so it'd be a waste of memory to load an entirely new pipeline again.
+Memory usage is determined by the pipeline with the highest memory requirement regardless of the number of pipelines.

-With the [`DiffusionPipeline.from_pipe`] API, you can switch between multiple pipelines to take advantage of their different features without increasing memory-usage. It is similar to turning on and off a feature in your pipeline.
-
-> [!TIP]
-> To switch between tasks (rather than features), use the [`~DiffusionPipeline.from_pipe`] method with the [AutoPipeline](../api/pipelines/auto_pipeline) class, which automatically identifies the pipeline class based on the task (learn more in the [AutoPipeline](../tutorials/autopipeline) tutorial).
-
-Let's start with a [`StableDiffusionPipeline`] and then reuse the loaded model components to create a [`StableDiffusionSAGPipeline`] to increase generation quality. You'll use the [`StableDiffusionPipeline`] with an [IP-Adapter](./ip_adapter) to generate a bear eating pizza.
-
-```python
-from diffusers import DiffusionPipeline, StableDiffusionSAGPipeline
-import torch
-import gc
-from diffusers.utils import load_image
-from accelerate.utils import compute_module_sizes
-
-image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png")
-
-pipe_sd = DiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V6.0_B1_noVAE", torch_dtype=torch.float16)
-pipe_sd.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
-pipe_sd.set_ip_adapter_scale(0.6)
-pipe_sd.to("cuda")
-
-generator = torch.Generator(device="cpu").manual_seed(33)
-out_sd = pipe_sd(
-    prompt="bear eats pizza",
-    negative_prompt="wrong white balance, dark, sketches,worst quality,low quality",
-    ip_adapter_image=image,
-    num_inference_steps=50,
-    generator=generator,
-).images[0]
-out_sd
-```
-
-<div class="flex justify-center">
-  <img class="rounded-xl" src="https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/from_pipe_out_sd_0.png"/>
-</div>
-
-For reference, you can check how much memory this process consumed.
-
-```python
-def bytes_to_giga_bytes(bytes):
-    return bytes / 1024 / 1024 / 1024
-print(f"Max memory allocated: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated())} GB")
-"Max memory allocated: 4.406213283538818 GB"
-```
-
-Now, reuse the same pipeline components from [`StableDiffusionPipeline`] in [`StableDiffusionSAGPipeline`] with the [`~DiffusionPipeline.from_pipe`] method.
+The example below loads a pipeline and then loads a second pipeline with [`~DiffusionPipeline.from_pipe`] to use [perturbed-attention guidance (PAG)](../api/pipelines/pag) to improve generation quality.

 > [!WARNING]
-> Some pipeline methods may not function properly on new pipelines created with [`~DiffusionPipeline.from_pipe`]. For instance, the [`~DiffusionPipeline.enable_model_cpu_offload`] method installs hooks on the model components based on a unique offloading sequence for each pipeline. If the models are executed in a different order in the new pipeline, the CPU offloading may not work correctly.
->
-> To ensure everything works as expected, we recommend re-applying a pipeline method on a new pipeline created with [`~DiffusionPipeline.from_pipe`].
+> Use [`AutoPipelineForText2Image`] because [`DiffusionPipeline`] doesn't support PAG. Refer to the [AutoPipeline](../tutorials/autopipeline) docs to learn more. 

-```python
-pipe_sag = StableDiffusionSAGPipeline.from_pipe(
-    pipe_sd
+```py
+import torch
+from diffusers import AutoPipelineForText2Image
+
+pipeline_sdxl = AutoPipelineForText2Image.from_pretrained(
+  "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, device_map="cuda"
 )
-
-generator = torch.Generator(device="cpu").manual_seed(33)
-out_sag = pipe_sag(
-    prompt="bear eats pizza",
-    negative_prompt="wrong white balance, dark, sketches,worst quality,low quality",
-    ip_adapter_image=image,
-    num_inference_steps=50,
-    generator=generator,
-    guidance_scale=1.0,
-    sag_scale=0.75
-).images[0]
-out_sag
+prompt = """
+cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California
+highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain
+"""
+image = pipeline_sdxl(prompt).images[0]
+print(f"Max memory reserved: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")
+# Max memory reserved: 10.47 GB
 ```

-<div class="flex justify-center">
-  <img class="rounded-xl" src="https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/from_pipe_out_sag_1.png"/>
-</div>
-
-If you check the memory usage, you'll see it remains the same as before because [`StableDiffusionPipeline`] and [`StableDiffusionSAGPipeline`] are sharing the same pipeline components. This allows you to use them interchangeably without any additional memory overhead.
+Set `enable_pag=True` in the second pipeline to enable PAG. The second pipeline uses the same amount of memory because it shares model weights with the first one.

 ```py
-print(f"Max memory allocated: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated())} GB")
-"Max memory allocated: 4.406213283538818 GB"
+pipeline = AutoPipelineForText2Image.from_pipe(
+  pipeline_sdxl, enable_pag=True
+)
+prompt = """
+cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California
+highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain
+"""
+image = pipeline(prompt).images[0]
+print(f"Max memory reserved: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")
+# Max memory reserved: 10.47 GB
 ```

-Let's animate the image with the [`AnimateDiffPipeline`] and also add a [`MotionAdapter`] module to the pipeline. For the [`AnimateDiffPipeline`], you need to unload the IP-Adapter first and reload it *after* you've created your new pipeline (this only applies to the [`AnimateDiffPipeline`]).
+> [!WARNING]
+> Pipelines created by [`~DiffusionPipeline.from_pipe`] share the same models and *state*. Modifying the state of a model in one pipeline affects all the other pipelines that share the same model.

-```py
-from diffusers import AnimateDiffPipeline, MotionAdapter, DDIMScheduler
-from diffusers.utils import export_to_gif
-
-pipe_sag.unload_ip_adapter()
-adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16)
-
-pipe_animate = AnimateDiffPipeline.from_pipe(pipe_sd, motion_adapter=adapter)
-pipe_animate.scheduler = DDIMScheduler.from_config(pipe_animate.scheduler.config, beta_schedule="linear")
-# load IP-Adapter and LoRA weights again
-pipe_animate.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
-pipe_animate.load_lora_weights("guoyww/animatediff-motion-lora-zoom-out", adapter_name="zoom-out")
-pipe_animate.to("cuda")
-
-generator = torch.Generator(device="cpu").manual_seed(33)
-pipe_animate.set_adapters("zoom-out", adapter_weights=0.75)
-out = pipe_animate(
-    prompt="bear eats pizza",
-    num_frames=16,
-    num_inference_steps=50,
-    ip_adapter_image=image,
-    generator=generator,
-).frames[0]
-export_to_gif(out, "out_animate.gif")
-```
-
-<div class="flex justify-center">
-  <img class="rounded-xl" src="https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/from_pipe_out_animate_3.gif"/>
-</div>
-
-The [`AnimateDiffPipeline`] is more memory-intensive and consumes 15GB of memory (see the [Memory-usage of from_pipe](#memory-usage-of-from_pipe) section to learn what this means for your memory-usage).
-
-```py
-print(f"Max memory allocated: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated())} GB")
-"Max memory allocated: 15.178664207458496 GB"
-```
-
-### Modify from_pipe components
-
-Pipelines loaded with [`~DiffusionPipeline.from_pipe`] can be customized with different model components or methods. However, whenever you modify the *state* of the model components, it affects all the other pipelines that share the same components. For example, if you call [`~diffusers.loaders.IPAdapterMixin.unload_ip_adapter`] on the [`StableDiffusionSAGPipeline`], you won't be able to use IP-Adapter with the [`StableDiffusionPipeline`] because it's been removed from their shared components.
-
-```py
-pipe.sag_unload_ip_adapter()
-
-generator = torch.Generator(device="cpu").manual_seed(33)
-out_sd = pipe_sd(
-    prompt="bear eats pizza",
-    negative_prompt="wrong white balance, dark, sketches,worst quality,low quality",
-    ip_adapter_image=image,
-    num_inference_steps=50,
-    generator=generator,
-).images[0]
-"AttributeError: 'NoneType' object has no attribute 'image_projection_layers'"
-```
-
-### Memory usage of from_pipe
-
-The memory requirement of loading multiple pipelines with [`~DiffusionPipeline.from_pipe`] is determined by the pipeline with the highest memory-usage regardless of the number of pipelines you create.
-
-| Pipeline | Memory usage (GB) |
-|---|---|
-| StableDiffusionPipeline | 4.400 |
-| StableDiffusionSAGPipeline | 4.400 |
-| AnimateDiffPipeline | 15.178 |
-
-The [`AnimateDiffPipeline`] has the highest memory requirement, so the *total memory-usage* is based only on the [`AnimateDiffPipeline`]. Your memory-usage will not increase if you create additional pipelines as long as their memory requirements doesn't exceed that of the [`AnimateDiffPipeline`]. Each pipeline can be used interchangeably without any additional memory overhead.
+Some methods may not work correctly on pipelines created with [`~DiffusionPipeline.from_pipe`]. For example, [`~DiffusionPipeline.enable_model_cpu_offload`] relies on a unique model execution order, which may differ in the new pipeline. To ensure proper functionality, reapply these methods on the new pipeline.

 ## Safety checker

-Diffusers implements a [safety checker](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/safety_checker.py) for Stable Diffusion models which can generate harmful content. The safety checker screens the generated output against known hardcoded not-safe-for-work (NSFW) content. If for whatever reason you'd like to disable the safety checker, pass `safety_checker=None` to the [`~DiffusionPipeline.from_pretrained`] method.
+Diffusers provides a [safety checker](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/safety_checker.py) for older Stable Diffusion models to prevent generating harmful content. It screens the generated output against a set of hardcoded harmful concepts.

-```python
+If you want to disable the safety checker, pass `safety_checker=None` in [`~DiffusionPipeline.from_pretrained`] as shown below.
+
+```py
 from diffusers import DiffusionPipeline

-pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, use_safetensors=True)
+pipeline = DiffusionPipeline.from_pretrained(
+  "stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
+)
 """
 You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide by the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend keeping the safety filter enabled in all public-facing circumstances, disabling it only for use cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
 """
-```
-
-## Checkpoint variants
-
-A checkpoint variant is usually a checkpoint whose weights are:
-
- Stored in a different floating point type, such as [torch.float16](https://pytorch.org/docs/stable/tensors.html#data-types), because it only requires half the bandwidth and storage to download. You can't use this variant if you're continuing training or using a CPU.
- Non-exponential mean averaged (EMA) weights which shouldn't be used for inference. You should use this variant to continue finetuning a model.
-
-> [!TIP]
-> When the checkpoints have identical model structures, but they were trained on different datasets and with a different training setup, they should be stored in separate repositories. For example, [stabilityai/stable-diffusion-2](https://hf.co/stabilityai/stable-diffusion-2) and [stabilityai/stable-diffusion-2-1](https://hf.co/stabilityai/stable-diffusion-2-1) are stored in separate repositories.
-
-Otherwise, a variant is **identical** to the original checkpoint. They have exactly the same serialization format (like [safetensors](./using_safetensors)), model structure, and their weights have identical tensor shapes.
-
-| **checkpoint type** | **weight name**                             | **argument for loading weights** |
-|---------------------|---------------------------------------------|----------------------------------|
-| original            | diffusion_pytorch_model.safetensors         |                                  |
-| floating point      | diffusion_pytorch_model.fp16.safetensors    | `variant`, `torch_dtype`         |
-| non-EMA             | diffusion_pytorch_model.non_ema.safetensors | `variant`                        |
-
-There are two important arguments for loading variants:
-
- `torch_dtype` specifies the floating point precision of the loaded checkpoint. For example, if you want to save bandwidth by loading a fp16 variant, you should set `variant="fp16"` and `torch_dtype=torch.float16` to *convert the weights* to fp16. Otherwise, the fp16 weights are converted to the default fp32 precision.
-
-  If you only set `torch_dtype=torch.float16`, the default fp32 weights are downloaded first and then converted to fp16.
-
- `variant` specifies which files should be loaded from the repository. For example, if you want to load a non-EMA variant of a UNet from [stable-diffusion-v1-5/stable-diffusion-v1-5](https://hf.co/stable-diffusion-v1-5/stable-diffusion-v1-5/tree/main/unet), set `variant="non_ema"` to download the `non_ema` file.
-
-<hfoptions id="variants">
-<hfoption id="fp16">
-
-```py
-from diffusers import DiffusionPipeline
-import torch
-
-pipeline = DiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5", variant="fp16", torch_dtype=torch.float16, use_safetensors=True
-)
-```
-
-</hfoption>
-<hfoption id="non-EMA">
-
-```py
-pipeline = DiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5", variant="non_ema", use_safetensors=True
-)
-```
-
-</hfoption>
-</hfoptions>
-
-Use the `variant` parameter in the [`DiffusionPipeline.save_pretrained`] method to save a checkpoint as a different floating point type or as a non-EMA variant. You should try save a variant to the same folder as the original checkpoint, so you have the option of loading both from the same folder.
-
-<hfoptions id="save">
-<hfoption id="fp16">
-
-```python
-from diffusers import DiffusionPipeline
-
-pipeline.save_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", variant="fp16")
-```
-
-</hfoption>
-<hfoption id="non_ema">
-
-```py
-pipeline.save_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", variant="non_ema")
-```
-
-</hfoption>
-</hfoptions>
-
-If you don't save the variant to an existing folder, you must specify the `variant` argument otherwise it'll throw an `Exception` because it can't find the original checkpoint.
-
-```python
-# 👎 this won't work
-pipeline = DiffusionPipeline.from_pretrained(
-    "./stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True
-)
-# 👍 this works
-pipeline = DiffusionPipeline.from_pretrained(
-    "./stable-diffusion-v1-5", variant="fp16", torch_dtype=torch.float16, use_safetensors=True
-)
-```
-
-## DiffusionPipeline explained
-
-As a class method, [`DiffusionPipeline.from_pretrained`] is responsible for two things:
-
- Download the latest version of the folder structure required for inference and cache it. If the latest folder structure is available in the local cache, [`DiffusionPipeline.from_pretrained`] reuses the cache and won't redownload the files.
- Load the cached weights into the correct pipeline [class](../api/pipelines/overview#diffusers-summary) - retrieved from the `model_index.json` file - and return an instance of it.
-
-The pipelines' underlying folder structure corresponds directly with their class instances. For example, the [`StableDiffusionPipeline`] corresponds to the folder structure in [`stable-diffusion-v1-5/stable-diffusion-v1-5`](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5).
-
-```python
-from diffusers import DiffusionPipeline
-
-repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
-pipeline = DiffusionPipeline.from_pretrained(repo_id, use_safetensors=True)
-print(pipeline)
-```
-
-You'll see pipeline is an instance of [`StableDiffusionPipeline`], which consists of seven components:
-
- `"feature_extractor"`: a [`~transformers.CLIPImageProcessor`] from 🤗 Transformers.
- `"safety_checker"`: a [component](https://github.com/huggingface/diffusers/blob/e55687e1e15407f60f32242027b7bb8170e58266/src/diffusers/pipelines/stable_diffusion/safety_checker.py#L32) for screening against harmful content.
- `"scheduler"`: an instance of [`PNDMScheduler`].
- `"text_encoder"`: a [`~transformers.CLIPTextModel`] from 🤗 Transformers.
- `"tokenizer"`: a [`~transformers.CLIPTokenizer`] from 🤗 Transformers.
- `"unet"`: an instance of [`UNet2DConditionModel`].
- `"vae"`: an instance of [`AutoencoderKL`].
-
-```json
-StableDiffusionPipeline {
-  "feature_extractor": [
-    "transformers",
-    "CLIPImageProcessor"
-  ],
-  "safety_checker": [
-    "stable_diffusion",
-    "StableDiffusionSafetyChecker"
-  ],
-  "scheduler": [
-    "diffusers",
-    "PNDMScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}
-```
-
-Compare the components of the pipeline instance to the [`stable-diffusion-v1-5/stable-diffusion-v1-5`](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/tree/main) folder structure, and you'll see there is a separate folder for each of the components in the repository:
-
-```
-.
-├── feature_extractor
-│   └── preprocessor_config.json
-├── model_index.json
-├── safety_checker
-│   ├── config.json
-|   ├── model.fp16.safetensors
-│   ├── model.safetensors
-│   ├── pytorch_model.bin
-|   └── pytorch_model.fp16.bin
-├── scheduler
-│   └── scheduler_config.json
-├── text_encoder
-│   ├── config.json
-|   ├── model.fp16.safetensors
-│   ├── model.safetensors
-│   |── pytorch_model.bin
-|   └── pytorch_model.fp16.bin
-├── tokenizer
-│   ├── merges.txt
-│   ├── special_tokens_map.json
-│   ├── tokenizer_config.json
-│   └── vocab.json
-├── unet
-│   ├── config.json
-│   ├── diffusion_pytorch_model.bin
-|   |── diffusion_pytorch_model.fp16.bin
-│   |── diffusion_pytorch_model.f16.safetensors
-│   |── diffusion_pytorch_model.non_ema.bin
-│   |── diffusion_pytorch_model.non_ema.safetensors
-│   └── diffusion_pytorch_model.safetensors
-|── vae
-.   ├── config.json
-.   ├── diffusion_pytorch_model.bin
-    ├── diffusion_pytorch_model.fp16.bin
-    ├── diffusion_pytorch_model.fp16.safetensors
-    └── diffusion_pytorch_model.safetensors
-```
-
-You can access each of the components of the pipeline as an attribute to view its configuration:
-
-```py
-pipeline.tokenizer
-CLIPTokenizer(
-    name_or_path="/root/.cache/huggingface/hub/models--runwayml--stable-diffusion-v1-5/snapshots/39593d5650112b4cc580433f6b0435385882d819/tokenizer",
-    vocab_size=49408,
-    model_max_length=77,
-    is_fast=False,
-    padding_side="right",
-    truncation_side="right",
-    special_tokens={
-        "bos_token": AddedToken("<|startoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True),
-        "eos_token": AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True),
-        "unk_token": AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True),
-        "pad_token": "<|endoftext|>",
-    },
-    clean_up_tokenization_spaces=True
-)
-```
-
-Every pipeline expects a [`model_index.json`](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/model_index.json) file that tells the [`DiffusionPipeline`]:
-
- which pipeline class to load from `_class_name`
- which version of 🧨 Diffusers was used to create the model in `_diffusers_version`
- what components from which library are stored in the subfolders (`name` corresponds to the component and subfolder name, `library` corresponds to the name of the library to load the class from, and `class` corresponds to the class name)
-
-```json
-{
-  "_class_name": "StableDiffusionPipeline",
-  "_diffusers_version": "0.6.0",
-  "feature_extractor": [
-    "transformers",
-    "CLIPImageProcessor"
-  ],
-  "safety_checker": [
-    "stable_diffusion",
-    "StableDiffusionSafetyChecker"
-  ],
-  "scheduler": [
-    "diffusers",
-    "PNDMScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}
-```
+```
@@ -0,0 +1,120 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+[[open-in-colab]]
+
+# Models
+
+A diffusion model relies on a few individual models working together to generate an output. These models are responsible for denoising, encoding inputs, and decoding latents into the actual outputs.
+
+This guide will show you how to load models.
+
+## Loading a model
+
+All models are loaded with the [`~ModelMixin.from_pretrained`] method, which downloads and caches the latest model version. If the latest files are available in the local cache, [`~ModelMixin.from_pretrained`] reuses files in the cache.
+
+Pass the `subfolder` argument to [`~ModelMixin.from_pretrained`] to specify where to load the model weights from. Omit the `subfolder` argument if the repository doesn't have a subfolder structure or if you're loading a standalone model.
+
+```py
+from diffusers import QwenImageTransformer2DModel
+
+model = QwenImageTransformer2DModel.from_pretrained("Qwen/Qwen-Image", subfolder="transformer")
+```
+
+## AutoModel
+
+[`AutoModel`] detects the model class from a `model_index.json` file or a model's `config.json` file. It fetches the correct model class from these files and delegates the actual loading to the model class. [`AutoModel`] is useful for automatic model type detection without needing to know the exact model class beforehand.
+
+```py
+from diffusers import AutoModel
+
+model = AutoModel.from_pretrained(
+    "Qwen/Qwen-Image", subfolder="transformer"
+)
+```
+
+## Model data types
+
+Use the `torch_dtype` argument in [`~ModelMixin.from_pretrained`] to load a model with a specific data type. This allows you to load a model in a lower precision to reduce memory usage.
+
+```py
+import torch
+from diffusers import QwenImageTransformer2DModel
+
+model = QwenImageTransformer2DModel.from_pretrained(
+    "Qwen/Qwen-Image",
+    subfolder="transformer",
+    torch_dtype=torch.bfloat16
+)
+```
+
+[nn.Module.to](https://docs.pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.to) can also convert to a specific data type on the fly. However, it converts *all* weights to the requested data type unlike `torch_dtype` which respects `_keep_in_fp32_modules`. This argument preserves layers in `torch.float32` for numerical stability and best generation quality (see example [_keep_in_fp32_modules](https://github.com/huggingface/diffusers/blob/f864a9a352fa4a220d860bfdd1782e3e5af96382/src/diffusers/models/transformers/transformer_wan.py#L374))
+
+```py
+from diffusers import QwenImageTransformer2DModel
+
+model = QwenImageTransformer2DModel.from_pretrained(
+    "Qwen/Qwen-Image", subfolder="transformer"
+)
+model = model.to(dtype=torch.float16) 
+```
+
+## Device placement
+
+Use the `device_map` argument in [`~ModelMixin.from_pretrained`] to place a model on an accelerator like a GPU. It is especially helpful where there are multiple GPUs.
+
+Diffusers currently provides three options to `device_map` for individual models, `"cuda"`, `"balanced"` and `"auto"`. Refer to the table below to compare the three placement strategies.
+
+| parameter | description |
+|---|---|
+| `"cuda"` | places pipeline on a supported accelerator (CUDA) |
+| `"balanced"` | evenly distributes pipeline on all GPUs |
+| `"auto"` | distribute model from fastest device first to slowest |
+
+Use the `max_memory` argument in [`~ModelMixin.from_pretrained`] to allocate a maximum amount of memory to use on each device. By default, Diffusers uses the maximum amount available.
+
+```py
+import torch
+from diffusers import QwenImagePipeline
+
+max_memory = {0: "16GB", 1: "16GB"}
+pipeline = QwenImagePipeline.from_pretrained(
+    "Qwen/Qwen-Image", 
+    torch_dtype=torch.bfloat16,
+    device_map="cuda",
+    max_memory=max_memory
+)
+```
+
+The `hf_device_map` attribute allows you to access and view the `device_map`.
+
+```py
+print(transformer.hf_device_map)
+# {'': device(type='cuda')}
+```
+
+## Saving models
+
+Save a model with the [`~ModelMixin.save_pretrained`] method.
+
+```py
+from diffusers import QwenImageTransformer2DModel
+
+model = QwenImageTransformer2DModel.from_pretrained("Qwen/Qwen-Image", subfolder="transformer")
+model.save_pretrained("./local/model")
+```
+
+For large models, it is helpful to use `max_shard_size` to save a model as multiple shards. A shard can be loaded faster and save memory (refer to the [parallel loading](./loading#parallel-loading) docs for more details), especially if there is more than one GPU.
+
+```py
+model.save_pretrained("./local/model", max_shard_size="5GB")
+```
@@ -176,7 +176,7 @@ Benefits of using the Diffusers-multifolder layout include:
    ).to("cuda")
    turbo_pipeline.scheduler = EulerDiscreteScheduler.from_config(
        turbo_pipeline.scheduler.config,
-        timestep+spacing="trailing"
+        timestep_spacing="trailing"
    )
    image = turbo_pipeline(
        "an astronaut riding a unicorn on mars",
@@ -267,6 +267,7 @@ pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_d
 save_folder = "flux-dev"
 pipe.save_pretrained("flux-dev")
 export_folder_as_dduf("flux-dev.dduf", folder_path=save_folder)
+```

 > [!TIP]
 > Packaging and loading quantized checkpoints in the DDUF format is supported as long as they respect the multi-folder structure.
@@ -10,19 +10,22 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
 specific language governing permissions and limitations under the License.
 -->

-# Push files to the Hub
-
 [[open-in-colab]]

-🤗 Diffusers provides a [`~diffusers.utils.PushToHubMixin`] for uploading your model, scheduler, or pipeline to the Hub. It is an easy way to store your files on the Hub, and also allows you to share your work with others. Under the hood, the [`~diffusers.utils.PushToHubMixin`]:
+# Sharing pipelines and models
+
+Share your pipeline or models and schedulers on the Hub with the [`~diffusers.utils.PushToHubMixin`] class. This class:

 1. creates a repository on the Hub
 2. saves your model, scheduler, or pipeline files so they can be reloaded later
 3. uploads folder containing these files to the Hub

-This guide will show you how to use the [`~diffusers.utils.PushToHubMixin`] to upload your files to the Hub.
+This guide will show you how to upload your files to the Hub with the [`~diffusers.utils.PushToHubMixin`] class.

-You'll need to log in to your Hub account with your access [token](https://huggingface.co/settings/tokens) first:
+Log in to your Hugging Face account with your access [token](https://huggingface.co/settings/tokens).
+
+<hfoptions id="login">
+<hfoption id="notebook">

 ```py
 from huggingface_hub import notebook_login
@@ -30,9 +33,19 @@ from huggingface_hub import notebook_login
 notebook_login()
 ```

+</hfoption>
+<hfoption id="hf CLI">
+
+```bash
+hf auth login
+```
+
+</hfoption>
+</hfoptions>
+
 ## Models

-To push a model to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the model to be stored on the Hub:
+To push a model to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the model.

 ```py
 from diffusers import ControlNetModel
@@ -48,15 +61,9 @@ controlnet = ControlNetModel(
 controlnet.push_to_hub("my-controlnet-model")
 ```

-For models, you can also specify the [*variant*](loading#checkpoint-variants) of the weights to push to the Hub. For example, to push `fp16` weights:
+The [`~diffusers.utils.PushToHubMixin.push_to_hub`] method saves the model's `config.json` file and the weights are automatically saved as safetensors files.

-```py
-controlnet.push_to_hub("my-controlnet-model", variant="fp16")
-```
-
-The [`~diffusers.utils.PushToHubMixin.push_to_hub`] function saves the model's `config.json` file and the weights are automatically saved in the `safetensors` format.
-
-Now you can reload the model from your repository on the Hub:
+Load the model again with [`~DiffusionPipeline.from_pretrained`].

 ```py
 model = ControlNetModel.from_pretrained("your-namespace/my-controlnet-model")
@@ -64,7 +71,7 @@ model = ControlNetModel.from_pretrained("your-namespace/my-controlnet-model")

 ## Scheduler

-To push a scheduler to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the scheduler to be stored on the Hub:
+To push a scheduler to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the scheduler.

 ```py
 from diffusers import DDIMScheduler
@@ -81,7 +88,7 @@ scheduler.push_to_hub("my-controlnet-scheduler")

 The [`~diffusers.utils.PushToHubMixin.push_to_hub`] function saves the scheduler's `scheduler_config.json` file to the specified repository.

-Now you can reload the scheduler from your repository on the Hub:
+Load the scheduler again with [`~SchedulerMixin.from_pretrained`].

 ```py
 scheduler = DDIMScheduler.from_pretrained("your-namepsace/my-controlnet-scheduler")
@@ -89,7 +96,7 @@ scheduler = DDIMScheduler.from_pretrained("your-namepsace/my-controlnet-schedule

 ## Pipeline

-You can also push an entire pipeline with all it's components to the Hub. For example, initialize the components of a [`StableDiffusionPipeline`] with the parameters you want:
+To push a pipeline to the Hub, initialize the pipeline components with your desired parameters.

 ```py
 from diffusers import (
@@ -143,7 +150,7 @@ text_encoder = CLIPTextModel(text_encoder_config)
 tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 ```

-Pass all of the components to the [`StableDiffusionPipeline`] and call [`~diffusers.utils.PushToHubMixin.push_to_hub`] to push the pipeline to the Hub:
+Pass all components to the pipeline and call [`~diffusers.utils.PushToHubMixin.push_to_hub`].

 ```py
 components = {
@@ -160,7 +167,7 @@ pipeline = StableDiffusionPipeline(**components)
 pipeline.push_to_hub("my-pipeline")
 ```

-The [`~diffusers.utils.PushToHubMixin.push_to_hub`] function saves each component to a subfolder in the repository. Now you can reload the pipeline from your repository on the Hub:
+The [`~diffusers.utils.PushToHubMixin.push_to_hub`] method saves each component to a subfolder in the repository. Load the pipeline again with [`~DiffusionPipeline.from_pretrained`].

 ```py
 pipeline = StableDiffusionPipeline.from_pretrained("your-namespace/my-pipeline")
@@ -168,10 +175,10 @@ pipeline = StableDiffusionPipeline.from_pretrained("your-namespace/my-pipeline")

 ## Privacy

-Set `private=True` in the [`~diffusers.utils.PushToHubMixin.push_to_hub`] function to keep your model, scheduler, or pipeline files private:
+Set `private=True` in [`~diffusers.utils.PushToHubMixin.push_to_hub`] to keep a model, scheduler, or pipeline files private.

 ```py
 controlnet.push_to_hub("my-controlnet-model-private", private=True)
 ```

-Private repositories are only visible to you, and other users won't be able to clone the repository and your repository won't appear in search results. Even if a user has the URL to your private repository, they'll receive a `404 - Sorry, we can't find the page you are looking for`. You must be [logged in](https://huggingface.co/docs/huggingface_hub/quick-start#login) to load a model from a private repository.
+Private repositories are only visible to you. Other users won't be able to clone the repository and it won't appear in search results. Even if a user has the URL to your private repository, they'll receive a `404 - Sorry, we can't find the page you are looking for`. You must be [logged in](https://huggingface.co/docs/huggingface_hub/quick-start#login) to load a model from a private repository.
@@ -10,129 +10,86 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
 specific language governing permissions and limitations under the License.
 -->

-# Reproducible pipelines
+# Reproducibility

-Diffusion models are inherently random which is what allows it to generate different outputs every time it is run. But there are certain times when you want to generate the same output every time, like when you're testing, replicating results, and even [improving image quality](#deterministic-batch-generation). While you can't expect to get identical results across platforms, you can expect reproducible results across releases and platforms within a certain tolerance range (though even this may vary).
+Diffusion is a random process that generates a different output every time. For certain situations like testing and replicating results, you want to generate the same result each time, across releases and platforms within a certain tolerance range.

-This guide will show you how to control randomness for deterministic generation on a CPU and GPU.
+This guide will show you how to control sources of randomness and enable deterministic algorithms.
+
+## Generator
+
+Pipelines rely on [torch.randn](https://pytorch.org/docs/stable/generated/torch.randn.html), which uses a different random seed each time, to create the initial noisy tensors. To generate the same output on a CPU or GPU, use a [Generator](https://docs.pytorch.org/docs/stable/generated/torch.Generator.html) to manage how random values are generated.

 > [!TIP]
-> We strongly recommend reading PyTorch's [statement about reproducibility](https://pytorch.org/docs/stable/notes/randomness.html):
->
-> "Completely reproducible results are not guaranteed across PyTorch releases, individual commits, or different platforms. Furthermore, results may not be reproducible between CPU and GPU executions, even when using identical seeds."
+> If reproducibility is important to your use case, we recommend always using a CPU `Generator`. The performance loss is often negligible and you'll generate more similar values.

-## Control randomness
+<hfoptions id="generator">
+<hfoption id="GPU">

-During inference, pipelines rely heavily on random sampling operations which include creating the
-Gaussian noise tensors to denoise and adding noise to the scheduling step.
+The GPU uses a different random number generator than the CPU. Diffusers solves this issue with the [`~utils.torch_utils.randn_tensor`] function to create the random tensor on a CPU and then moving it to the GPU. This function is used everywhere inside the pipeline and you don't need to explicitly call it.

-Take a look at the tensor values in the [`DDIMPipeline`] after two inference steps.
+Use [manual_seed](https://docs.pytorch.org/docs/stable/generated/torch.manual_seed.html) as shown below to set a seed.

-```python
-from diffusers import DDIMPipeline
-import numpy as np
-
-ddim = DDIMPipeline.from_pretrained( "google/ddpm-cifar10-32", use_safetensors=True)
-image = ddim(num_inference_steps=2, output_type="np").images
-print(np.abs(image).sum())
-```
-
-Running the code above prints one value, but if you run it again you get a different value.
-
-Each time the pipeline is run, [torch.randn](https://pytorch.org/docs/stable/generated/torch.randn.html) uses a different random seed to create the Gaussian noise tensors. This leads to a different result each time it is run and enables the diffusion pipeline to generate a different random image each time.
-
-But if you need to reliably generate the same image, that depends on whether you're running the pipeline on a CPU or GPU.
-
-> [!TIP]
-> It might seem unintuitive to pass `Generator` objects to a pipeline instead of the integer value representing the seed. However, this is the recommended design when working with probabilistic models in PyTorch because a `Generator` is a *random state* that can be passed to multiple pipelines in a sequence. As soon as the `Generator` is consumed, the *state* is changed in place which means even if you passed the same `Generator` to a different pipeline, it won't produce the same result because the state is already changed.
-
-<hfoptions id="hardware">
-<hfoption id="CPU">
-
-To generate reproducible results on a CPU, you'll need to use a PyTorch [Generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) and set a seed. Now when you run the code, it always prints a value of `1491.1711` because the `Generator` object with the seed is passed to all the random functions in the pipeline. You should get a similar, if not the same, result on whatever hardware and PyTorch version you're using.
-
-```python
+```py
 import torch
 import numpy as np
 from diffusers import DDIMPipeline

-ddim = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32", use_safetensors=True)
+ddim = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32", device_map="cuda")
+generator = torch.manual_seed(0)
+image = ddim(num_inference_steps=2, output_type="np", generator=generator).images
+print(np.abs(image).sum())
+```
+
+</hfoption>
+<hfoption id="CPU">
+
+Set `device="cpu"` in the `Generator` and use [manual_seed](https://docs.pytorch.org/docs/stable/generated/torch.manual_seed.html) to set a seed for generating random numbers.
+
+```py
+import torch
+import numpy as np
+from diffusers import DDIMPipeline
+
+ddim = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32")
 generator = torch.Generator(device="cpu").manual_seed(0)
 image = ddim(num_inference_steps=2, output_type="np", generator=generator).images
 print(np.abs(image).sum())
 ```

-</hfoption>
-<hfoption id="GPU">
-
-Writing a reproducible pipeline on a GPU is a bit trickier, and full reproducibility across different hardware is not guaranteed because matrix multiplication - which diffusion pipelines require a lot of - is less deterministic on a GPU than a CPU. For example, if you run the same code example from the CPU example, you'll get a different result even though the seed is identical. This is because the GPU uses a different random number generator than the CPU.
-
-```python
-import torch
-import numpy as np
-from diffusers import DDIMPipeline
-
-ddim = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32", use_safetensors=True)
-ddim.to("cuda")
-generator = torch.Generator(device="cuda").manual_seed(0)
-image = ddim(num_inference_steps=2, output_type="np", generator=generator).images
-print(np.abs(image).sum())
-```
-
-To avoid this issue, Diffusers has a [`~utils.torch_utils.randn_tensor`] function for creating random noise on the CPU, and then moving the tensor to a GPU if necessary. The [`~utils.torch_utils.randn_tensor`] function is used everywhere inside the pipeline. Now you can call [torch.manual_seed](https://pytorch.org/docs/stable/generated/torch.manual_seed.html) which automatically creates a CPU `Generator` that can be passed to the pipeline even if it is being run on a GPU.
-
-```python
-import torch
-import numpy as np
-from diffusers import DDIMPipeline
-
-ddim = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32", use_safetensors=True)
-ddim.to("cuda")
-generator = torch.manual_seed(0)
-image = ddim(num_inference_steps=2, output_type="np", generator=generator).images
-print(np.abs(image).sum())
-```
-
-> [!TIP]
-> If reproducibility is important to your use case, we recommend always passing a CPU `Generator`. The performance loss is often negligible and you'll generate more similar values than if the pipeline had been run on a GPU.
-
-Finally, more complex pipelines such as [`UnCLIPPipeline`], are often extremely
-susceptible to precision error propagation. You'll need to use
-exactly the same hardware and PyTorch version for full reproducibility.
-
 </hfoption>
 </hfoptions>

+The `Generator` object should be passed to the pipeline instead of an integer seed. `Generator` maintains a *random state* that is consumed and modified when used. Once consumed, the same `Generator` object produces different results in subsequent calls, even across different pipelines, because it's *state* has changed.
+
+```py
+generator = torch.manual_seed(0)
+
+for _ in range(5):
+-    image = pipeline(prompt, generator=generator)
+    image = pipeline(prompt, generator=torch.manual_seed(0))
+```
+
 ## Deterministic algorithms

-You can also configure PyTorch to use deterministic algorithms to create a reproducible pipeline. The downside is that deterministic algorithms may be slower than non-deterministic ones and you may observe a decrease in performance.
+PyTorch supports [deterministic algorithms](https://docs.pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms) - where available - for certain operations so they produce the same results. Deterministic algorithms may be slower and decrease performance.

-Non-deterministic behavior occurs when operations are launched in more than one CUDA stream. To avoid this, set the environment variable [CUBLAS_WORKSPACE_CONFIG](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) to `:16:8` to only use one buffer size during runtime.
-
-PyTorch typically benchmarks multiple algorithms to select the fastest one, but if you want reproducibility, you should disable this feature because the benchmark may select different algorithms each time. Set Diffusers [enable_full_determinism](https://github.com/huggingface/diffusers/blob/142f353e1c638ff1d20bd798402b68f72c1ebbdd/src/diffusers/utils/testing_utils.py#L861) to enable deterministic algorithms.
-
-```py
-enable_full_determinism()
-```
-
-Now when you run the same pipeline twice, you'll get identical results.
+Use Diffusers' [enable_full_determinism](https://github.com/huggingface/diffusers/blob/142f353e1c638ff1d20bd798402b68f72c1ebbdd/src/diffusers/utils/testing_utils.py#L861) function to enable deterministic algorithms.

 ```py
 import torch
-from diffusers import DDIMScheduler, StableDiffusionPipeline
+from diffusers_utils import enable_full_determinism

-pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", use_safetensors=True).to("cuda")
-pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
-g = torch.Generator(device="cuda")
-
-prompt = "A bear is playing a guitar on Times Square"
-
-g.manual_seed(0)
-result1 = pipe(prompt=prompt, num_inference_steps=50, generator=g, output_type="latent").images
-
-g.manual_seed(0)
-result2 = pipe(prompt=prompt, num_inference_steps=50, generator=g, output_type="latent").images
-
-print("L_inf dist =", abs(result1 - result2).max())
-"L_inf dist = tensor(0., device='cuda:0')"
+enable_full_determinism()
 ```
+
+Under the hood, `enable_full_determinism` works by:
+
+- Setting the environment variable [CUBLAS_WORKSPACE_CONFIG](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility) to `:16:8` to only use one buffer size during rntime. Non-deterministic behavior occurs when operations are used in more than one CUDA stream.
+- Disabling benchmarking to find the fastest convolution operation by setting `torch.backends.cudnn.benchmark=False`. Non-deterministic behavior occurs because the benchmark may select different algorithms each time depending on hardware or benchmarking noise.
+- Disabling TensorFloat32 (TF32) operations in favor of more precise and consistent full-precision operations.
+
+
+## Resources
+
+We strongly recommend reading PyTorch's developer notes about [Reproducibility](https://docs.pytorch.org/docs/stable/notes/randomness.html). You can try to limit randomness, but it is not *guaranteed* even with an identical seed.
@@ -165,53 +165,6 @@ image

 Most images look very similar and are comparable in quality. Again, it often comes down to your specific use case so a good approach is to run multiple different schedulers and compare the results.

-### Flax schedulers
-
-To compare Flax schedulers, you need to additionally load the scheduler state into the model parameters. For example, let's change the default scheduler in [`FlaxStableDiffusionPipeline`] to use the super fast [`FlaxDPMSolverMultistepScheduler`].
-
-> [!WARNING]
-> The [`FlaxLMSDiscreteScheduler`] and [`FlaxDDPMScheduler`] are not compatible with the [`FlaxStableDiffusionPipeline`] yet.
-
-```py
-import jax
-import numpy as np
-from flax.jax_utils import replicate
-from flax.training.common_utils import shard
-from diffusers import FlaxStableDiffusionPipeline, FlaxDPMSolverMultistepScheduler
-
-scheduler, scheduler_state = FlaxDPMSolverMultistepScheduler.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5",
-    subfolder="scheduler"
-)
-pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
-    "stable-diffusion-v1-5/stable-diffusion-v1-5",
-    scheduler=scheduler,
-    variant="bf16",
-    dtype=jax.numpy.bfloat16,
-)
-params["scheduler"] = scheduler_state
-```
-
-Then you can take advantage of Flax's compatibility with TPUs to generate a number of images in parallel. You'll need to make a copy of the model parameters for each available device and then split the inputs across them to generate your desired number of images.
-
-```py
-# Generate 1 image per parallel device (8 on TPUv2-8 or TPUv3-8)
-prompt = "A photograph of an astronaut riding a horse on Mars, high resolution, high definition."
-num_samples = jax.device_count()
-prompt_ids = pipeline.prepare_inputs([prompt] * num_samples)
-
-prng_seed = jax.random.PRNGKey(0)
-num_inference_steps = 25
-
-# shard inputs and rng
-params = replicate(params)
-prng_seed = jax.random.split(prng_seed, jax.device_count())
-prompt_ids = shard(prompt_ids)
-
-images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images
-images = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:])))
-```
-
 ## Models

 Models are loaded from the [`ModelMixin.from_pretrained`] method, which downloads and caches the latest version of the model weights and configurations. If the latest files are available in the local cache, [`~ModelMixin.from_pretrained`] reuses files in the cache instead of re-downloading them.
@@ -1,225 +0,0 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-->
-
-# JAX/Flax
-
-[[open-in-colab]]
-
-🤗 Diffusers supports Flax for super fast inference on Google TPUs, such as those available in Colab, Kaggle or Google Cloud Platform. This guide shows you how to run inference with Stable Diffusion using JAX/Flax.
-
-Before you begin, make sure you have the necessary libraries installed:
-
-```py
-# uncomment to install the necessary libraries in Colab
-#!pip install -q jax==0.3.25 jaxlib==0.3.25 flax transformers ftfy
-#!pip install -q diffusers
-```
-
-You should also make sure you're using a TPU backend. While JAX does not run exclusively on TPUs, you'll get the best performance on a TPU because each server has 8 TPU accelerators working in parallel.
-
-If you are running this guide in Colab, select *Runtime* in the menu above, select the option *Change runtime type*, and then select *TPU* under the *Hardware accelerator* setting. Import JAX and quickly check whether you're using a TPU:
-
-```python
-import jax
-import jax.tools.colab_tpu
-jax.tools.colab_tpu.setup_tpu()
-
-num_devices = jax.device_count()
-device_type = jax.devices()[0].device_kind
-
-print(f"Found {num_devices} JAX devices of type {device_type}.")
-assert (
-    "TPU" in device_type,
-    "Available device is not a TPU, please select TPU from Runtime > Change runtime type > Hardware accelerator"
-)
-# Found 8 JAX devices of type Cloud TPU.
-```
-
-Great, now you can import the rest of the dependencies you'll need:
-
-```python
-import jax.numpy as jnp
-from jax import pmap
-from flax.jax_utils import replicate
-from flax.training.common_utils import shard
-
-from diffusers import FlaxStableDiffusionPipeline
-```
-
-## Load a model
-
-Flax is a functional framework, so models are stateless and parameters are stored outside of them. Loading a pretrained Flax pipeline returns *both* the pipeline and the model weights (or parameters). In this guide, you'll use `bfloat16`, a more efficient half-float type that is supported by TPUs (you can also use `float32` for full precision if you want).
-
-```python
-dtype = jnp.bfloat16
-pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
-    "CompVis/stable-diffusion-v1-4",
-    variant="bf16",
-    dtype=dtype,
-)
-```
-
-## Inference
-
-TPUs usually have 8 devices working in parallel, so let's use the same prompt for each device. This means you can perform inference on 8 devices at once, with each device generating one image. As a result, you'll get 8 images in the same amount of time it takes for one chip to generate a single image!
-
-<Tip>
-
-Learn more details in the [How does parallelization work?](#how-does-parallelization-work) section.
-
-</Tip>
-
-After replicating the prompt, get the tokenized text ids by calling the `prepare_inputs` function on the pipeline. The length of the tokenized text is set to 77 tokens as required by the configuration of the underlying CLIP text model.
-
-```python
-prompt = "A cinematic film still of Morgan Freeman starring as Jimi Hendrix, portrait, 40mm lens, shallow depth of field, close up, split lighting, cinematic"
-prompt = [prompt] * jax.device_count()
-prompt_ids = pipeline.prepare_inputs(prompt)
-prompt_ids.shape
-# (8, 77)
-```
-
-Model parameters and inputs have to be replicated across the 8 parallel devices. The parameters dictionary is replicated with [`flax.jax_utils.replicate`](https://flax.readthedocs.io/en/latest/api_reference/flax.jax_utils.html#flax.jax_utils.replicate) which traverses the dictionary and changes the shape of the weights so they are repeated 8 times. Arrays are replicated using `shard`.
-
-```python
-# parameters
-p_params = replicate(params)
-
-# arrays
-prompt_ids = shard(prompt_ids)
-prompt_ids.shape
-# (8, 1, 77)
-```
-
-This shape means each one of the 8 devices receives as an input a `jnp` array with shape `(1, 77)`, where `1` is the batch size per device. On TPUs with sufficient memory, you could have a batch size larger than `1` if you want to generate multiple images (per chip) at once.
-
-Next, create a random number generator to pass to the generation function. This is standard procedure in Flax, which is very serious and opinionated about random numbers. All functions that deal with random numbers are expected to receive a generator to ensure reproducibility, even when you're training across multiple distributed devices.
-
-The helper function below uses a seed to initialize a random number generator. As long as you use the same seed, you'll get the exact same results. Feel free to use different seeds when exploring results later in the guide.
-
-```python
-def create_key(seed=0):
-    return jax.random.PRNGKey(seed)
-```
-
-The helper function, or `rng`, is split 8 times so each device receives a different generator and generates a different image.
-
-```python
-rng = create_key(0)
-rng = jax.random.split(rng, jax.device_count())
-```
-
-To take advantage of JAX's optimized speed on a TPU, pass `jit=True` to the pipeline to compile the JAX code into an efficient representation and to ensure the model runs in parallel across the 8 devices.
-
-<Tip warning={true}>
-
-You need to ensure all your inputs have the same shape in subsequent calls, otherwise JAX will need to recompile the code which is slower.
-
-</Tip>
-
-The first inference run takes more time because it needs to compile the code, but subsequent calls (even with different inputs) are much faster. For example, it took more than a minute to compile on a TPU v2-8, but then it takes about **7s** on a future inference run!
-
-```py
-%%time
-images = pipeline(prompt_ids, p_params, rng, jit=True)[0]
-
-# CPU times: user 56.2 s, sys: 42.5 s, total: 1min 38s
-# Wall time: 1min 29s
-```
-
-The returned array has shape `(8, 1, 512, 512, 3)` which should be reshaped to remove the second dimension and get 8 images of `512 × 512 × 3`. Then you can use the [`~utils.numpy_to_pil`] function to convert the arrays into images.
-
-```python
-from diffusers.utils import make_image_grid
-
-images = images.reshape((images.shape[0] * images.shape[1],) + images.shape[-3:])
-images = pipeline.numpy_to_pil(images)
-make_image_grid(images, rows=2, cols=4)
-```
-
-![img](https://huggingface.co/datasets/YiYiXu/test-doc-assets/resolve/main/stable_diffusion_jax_how_to_cell_38_output_0.jpeg)
-
-## Using different prompts
-
-You don't necessarily have to use the same prompt on all devices. For example, to generate 8 different prompts:
-
-```python
-prompts = [
-    "Labrador in the style of Hokusai",
-    "Painting of a squirrel skating in New York",
-    "HAL-9000 in the style of Van Gogh",
-    "Times Square under water, with fish and a dolphin swimming around",
-    "Ancient Roman fresco showing a man working on his laptop",
-    "Close-up photograph of young black woman against urban background, high quality, bokeh",
-    "Armchair in the shape of an avocado",
-    "Clown astronaut in space, with Earth in the background",
-]
-
-prompt_ids = pipeline.prepare_inputs(prompts)
-prompt_ids = shard(prompt_ids)
-
-images = pipeline(prompt_ids, p_params, rng, jit=True).images
-images = images.reshape((images.shape[0] * images.shape[1],) + images.shape[-3:])
-images = pipeline.numpy_to_pil(images)
-
-make_image_grid(images, 2, 4)
-```
-
-![img](https://huggingface.co/datasets/YiYiXu/test-doc-assets/resolve/main/stable_diffusion_jax_how_to_cell_43_output_0.jpeg)
-
-## How does parallelization work?
-
-The Flax pipeline in 🤗 Diffusers automatically compiles the model and runs it in parallel on all available devices. Let's take a closer look at how that process works.
-
-JAX parallelization can be done in multiple ways. The easiest one revolves around using the [`jax.pmap`](https://jax.readthedocs.io/en/latest/_autosummary/jax.pmap.html) function to achieve single-program multiple-data (SPMD) parallelization. It means running several copies of the same code, each on different data inputs. More sophisticated approaches are possible, and you can go over to the JAX [documentation](https://jax.readthedocs.io/en/latest/index.html) to explore this topic in more detail if you are interested!
-
-`jax.pmap` does two things:
-
-1. Compiles (or "`jit`s") the code which is similar to `jax.jit()`. This does not happen when you call `pmap`, and only the first time the `pmap`ped function is called.
-2. Ensures the compiled code runs in parallel on all available devices.
-
-To demonstrate, call `pmap` on the pipeline's `_generate` method (this is a private method that generates images and may be renamed or removed in future releases of 🤗 Diffusers):
-
-```python
-p_generate = pmap(pipeline._generate)
-```
-
-After calling `pmap`, the prepared function `p_generate` will:
-
-1. Make a copy of the underlying function, `pipeline._generate`, on each device.
-2. Send each device a different portion of the input arguments (this is why it's necessary to call the *shard* function). In this case, `prompt_ids` has shape `(8, 1, 77, 768)` so the array is split into 8 and each copy of `_generate` receives an input with shape `(1, 77, 768)`.
-
-The most important thing to pay attention to here is the batch size (1 in this example), and the input dimensions that make sense for your code. You don't have to change anything else to make the code work in parallel.
-
-The first time you call the pipeline takes more time, but the calls afterward are much faster. The `block_until_ready` function is used to correctly measure inference time because JAX uses asynchronous dispatch and returns control to the Python loop as soon as it can. You don't need to use that in your code; blocking occurs automatically when you want to use the result of a computation that has not yet been materialized.
-
-```py
-%%time
-images = p_generate(prompt_ids, p_params, rng)
-images = images.block_until_ready()
-
-# CPU times: user 1min 15s, sys: 18.2 s, total: 1min 34s
-# Wall time: 1min 15s
-```
-
-Check your image dimensions to see if they're correct:
-
-```python
-images.shape
-# (8, 1, 512, 512, 3)
-```
-
-## Resources
-
-To learn more about how JAX works with Stable Diffusion, you may be interested in reading:
-
-* [Accelerating Stable Diffusion XL Inference with JAX on Cloud TPU v5e](https://hf.co/blog/sdxl_jax)
@@ -98,7 +98,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_compute_dtype": torch.bfloat16
    },
-  components_to_quantize=["transformer"]
+  components_to_quantize="transformer"
 )

 pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -287,7 +287,7 @@ export_to_video(output, "output.mp4", fps=16)

 ## Reduce memory usage

-Recent video models like [`HunyuanVideoPipeline`] and [`WanPipeline`], which have 10B+ parameters, require a lot of memory and it often exceeds the memory availabe on consumer hardware. Diffusers offers several techniques for reducing the memory requirements of these large models.
+Recent video models like [`HunyuanVideoPipeline`] and [`WanPipeline`], which have 10B+ parameters, require a lot of memory and it often exceeds the memory available on consumer hardware. Diffusers offers several techniques for reducing the memory requirements of these large models.

 > [!TIP]
 > Refer to the [Reduce memory usage](../optimization/memory) guide for more details about other memory saving techniques.
@@ -223,7 +223,7 @@ from diffusers.image_processor import VaeImageProcessor
 import torch 

 vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=torch.bfloat16).to("cuda")
-vae_scale_factor = 2 ** (len(vae.config.block_out_channels))
+vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
 image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)

 with torch.no_grad():
@@ -1399,6 +1399,7 @@ def main(args):
                torch_dtype = torch.float16
            elif args.prior_generation_precision == "bf16":
                torch_dtype = torch.bfloat16
+
            pipeline = FluxPipeline.from_pretrained(
                args.pretrained_model_name_or_path,
                torch_dtype=torch_dtype,
@@ -1419,7 +1420,8 @@ def main(args):
            for example in tqdm(
                sample_dataloader, desc="Generating class images", disable=not accelerator.is_local_main_process
            ):
-                images = pipeline(example["prompt"]).images
+                with torch.autocast(device_type=accelerator.device.type, dtype=torch_dtype):
+                    images = pipeline(prompt=example["prompt"]).images

                for i, image in enumerate(images):
                    hash_image = insecure_hashlib.sha1(image.tobytes()).hexdigest()
@@ -88,6 +88,8 @@ PIXART-α Controlnet pipeline | Implementation of the controlnet model for pixar
 | FaithDiff Stable Diffusion XL Pipeline | Implementation of [(CVPR 2025) FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolutionUnleashing Diffusion Priors for Faithful Image Super-resolution](https://huggingface.co/papers/2411.18824) - FaithDiff is a faithful image super-resolution method that leverages latent diffusion models by actively adapting the diffusion prior and jointly fine-tuning its components (encoder and diffusion model) with an alignment module to ensure high fidelity and structural consistency. | [FaithDiff Stable Diffusion XL Pipeline](#faithdiff-stable-diffusion-xl-pipeline) | [![Hugging Face Models](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-blue)](https://huggingface.co/jychen9811/FaithDiff) | [Junyang Chen, Jinshan Pan, Jiangxin Dong, IMAG Lab, (Adapted by Eliseu Silva)](https://github.com/JyChen9811/FaithDiff) |
 | Stable Diffusion 3 InstructPix2Pix Pipeline | Implementation of Stable Diffusion 3 InstructPix2Pix Pipeline | [Stable Diffusion 3 InstructPix2Pix Pipeline](#stable-diffusion-3-instructpix2pix-pipeline) | [![Hugging Face Models](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-blue)](https://huggingface.co/BleachNick/SD3_UltraEdit_freeform) [![Hugging Face Models](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Models-blue)](https://huggingface.co/CaptainZZZ/sd3-instructpix2pix) | [Jiayu Zhang](https://github.com/xduzhangjiayu) and [Haozhe Zhao](https://github.com/HaozheZhao)|
 | Flux Kontext multiple images | A modified version of the `FluxKontextPipeline` that supports calling Flux Kontext with multiple reference images.| [Flux Kontext multiple input Pipeline](#flux-kontext-multiple-images) | - |  [Net-Mist](https://github.com/Net-Mist) |
+
+
 To load a custom pipeline you just need to pass the `custom_pipeline` argument to `DiffusionPipeline`, as one of the files in `diffusers/examples/community`. Feel free to send a PR with your own pipelines, we will merge them quickly.

 ```py
@@ -398,7 +398,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -147,7 +147,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`.
@@ -197,7 +197,7 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -173,7 +173,7 @@ class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -888,7 +888,7 @@ class StableDiffusionLongPromptWeightingPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -1131,7 +1131,7 @@ class StableDiffusionLongPromptWeightingPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -721,7 +721,7 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(OnnxStableDiffusionPipeline
            latents (`np.ndarray`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            max_embeddings_multiples (`int`, *optional*, defaults to `3`):
                The max multiple length of prompt embeddings compared to the max output length of text encoder.
            output_type (`str`, *optional*, defaults to `"pil"`):
@@ -918,7 +918,7 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(OnnxStableDiffusionPipeline
            latents (`np.ndarray`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            max_embeddings_multiples (`int`, *optional*, defaults to `3`):
                The max multiple length of prompt embeddings compared to the max output length of text encoder.
            output_type (`str`, *optional*, defaults to `"pil"`):
@@ -1519,7 +1519,7 @@ class SDXLLongPromptWeightingPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            ip_adapter_image: (`PipelineImageInput`, *optional*):
                Optional image input to work with IP Adapters.
            prompt_embeds (`torch.Tensor`, *optional*):
@@ -187,7 +187,7 @@ class MultilingualStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -888,7 +888,7 @@ class KolorsControlNetPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -1066,7 +1066,7 @@ class KolorsControlNetImg2ImgPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -1298,7 +1298,7 @@ class KolorsControlNetInpaintPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -724,7 +724,7 @@ class DemoFusionSDXLPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -1705,6 +1705,12 @@ class FaithDiffStableDiffusionXLPipeline(
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()
        self.unet.denoise_encoder.enable_tiling()

@@ -1713,6 +1719,12 @@ class FaithDiffStableDiffusionXLPipeline(
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()
        self.unet.denoise_encoder.disable_tiling()

@@ -1906,7 +1918,7 @@ class FaithDiffStableDiffusionXLPipeline(
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -730,7 +730,7 @@ class FluxDifferentialImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
                1)`, or `(H, W)`.
            mask_image_latent (`torch.Tensor`, `List[torch.Tensor]`):
                `Tensor` representing an image batch to mask `image` generated by VAE. If not provided, the mask
-                latents tensor will ge generated by `mask_image`.
+                latents tensor will be generated by `mask_image`.
            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                The height in pixels of the generated image. This is set to 1024 by default for the best results.
            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -769,7 +769,7 @@ class FluxDifferentialImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -35,6 +35,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
 from diffusers.utils import (
    USE_PEFT_BACKEND,
+    deprecate,
    is_torch_xla_available,
    logging,
    replace_example_docstring,
@@ -643,6 +644,12 @@ class FluxKontextPipeline(
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
@@ -651,6 +658,12 @@ class FluxKontextPipeline(
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    def preprocess_image(self, image: PipelineImageInput, _auto_resize: bool, multiple_of: int) -> torch.Tensor:
@@ -885,7 +898,7 @@ class FluxKontextPipeline(
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -30,6 +30,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
 from diffusers.utils import (
    USE_PEFT_BACKEND,
+    deprecate,
    is_torch_xla_available,
    logging,
    replace_example_docstring,
@@ -526,6 +527,12 @@ class RFInversionFluxPipeline(
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
+        depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
+        deprecate(
+            "enable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_slicing()

    def disable_vae_slicing(self):
@@ -533,6 +540,12 @@ class RFInversionFluxPipeline(
        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
+        deprecate(
+            "disable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_slicing()

    def enable_vae_tiling(self):
@@ -541,6 +554,12 @@ class RFInversionFluxPipeline(
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    def disable_vae_tiling(self):
@@ -548,6 +567,12 @@ class RFInversionFluxPipeline(
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    def prepare_latents_inversion(
@@ -711,7 +736,7 @@ class RFInversionFluxPipeline(
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -35,6 +35,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
 from diffusers.utils import (
    USE_PEFT_BACKEND,
+    deprecate,
    is_torch_xla_available,
    logging,
    replace_example_docstring,
@@ -702,6 +703,12 @@ class FluxSemanticGuidancePipeline(
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
@@ -710,6 +717,12 @@ class FluxSemanticGuidancePipeline(
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    # Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.prepare_latents
@@ -853,7 +866,7 @@ class FluxSemanticGuidancePipeline(
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -28,6 +28,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
 from diffusers.utils import (
    USE_PEFT_BACKEND,
+    deprecate,
    is_torch_xla_available,
    logging,
    replace_example_docstring,
@@ -503,6 +504,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
+        depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
+        deprecate(
+            "enable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_slicing()

    def disable_vae_slicing(self):
@@ -510,6 +517,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
+        deprecate(
+            "disable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_slicing()

    def enable_vae_tiling(self):
@@ -518,6 +531,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    def disable_vae_tiling(self):
@@ -525,6 +544,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    def prepare_latents(
@@ -639,7 +664,7 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -904,7 +904,7 @@ class KolorsDifferentialImg2ImgPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -1246,7 +1246,7 @@ class KolorsInpaintPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -611,7 +611,7 @@ class Prompt2PromptPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -1480,7 +1480,7 @@ class StyleAlignedSDXLPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -29,11 +29,7 @@ from diffusers.models.transformers import SD3Transformer2DModel
 from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.pipelines.stable_diffusion_3.pipeline_output import StableDiffusion3PipelineOutput
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
-from diffusers.utils import (
-    is_torch_xla_available,
-    logging,
-    replace_example_docstring,
-)
+from diffusers.utils import is_torch_xla_available, logging, replace_example_docstring
 from diffusers.utils.torch_utils import randn_tensor


@@ -748,7 +744,7 @@ class StableDiffusion3DifferentialImg2ImgPipeline(DiffusionPipeline):
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -945,7 +945,7 @@ class StableDiffusion3InstructPix2PixPipeline(
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -504,6 +504,12 @@ class StableDiffusionBoxDiffPipeline(
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
+        depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
+        deprecate(
+            "enable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_slicing()

    def disable_vae_slicing(self):
@@ -511,6 +517,12 @@ class StableDiffusionBoxDiffPipeline(
        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
+        deprecate(
+            "disable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_slicing()

    def enable_vae_tiling(self):
@@ -519,6 +531,12 @@ class StableDiffusionBoxDiffPipeline(
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    def disable_vae_tiling(self):
@@ -526,6 +544,12 @@ class StableDiffusionBoxDiffPipeline(
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    def _encode_prompt(
@@ -471,6 +471,12 @@ class StableDiffusionPAGPipeline(
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
+        depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
+        deprecate(
+            "enable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_slicing()

    def disable_vae_slicing(self):
@@ -478,6 +484,12 @@ class StableDiffusionPAGPipeline(
        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
+        deprecate(
+            "disable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_slicing()

    def enable_vae_tiling(self):
@@ -486,6 +498,12 @@ class StableDiffusionPAGPipeline(
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    def disable_vae_tiling(self):
@@ -493,6 +511,12 @@ class StableDiffusionPAGPipeline(
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    def _encode_prompt(
@@ -1786,7 +1786,7 @@ class StableDiffusionXL_AE_Pipeline(
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -973,7 +973,7 @@ class StableDiffusionXLControlNetAdapterPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -1329,7 +1329,7 @@ class StableDiffusionXLControlNetAdapterInpaintPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -1053,7 +1053,7 @@ class StableDiffusionXLDifferentialImg2ImgPipeline(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -832,7 +832,7 @@ class StableDiffusionXLPipelineIpex(
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -632,7 +632,7 @@ class CogVideoXSTGPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -26,7 +26,7 @@ from diffusers.models import AutoencoderKLHunyuanVideo, HunyuanVideoTransformer3
 from diffusers.pipelines.hunyuan_video.pipeline_output import HunyuanVideoPipelineOutput
 from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
-from diffusers.utils import is_torch_xla_available, logging, replace_example_docstring
+from diffusers.utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
 from diffusers.utils.torch_utils import randn_tensor
 from diffusers.video_processor import VideoProcessor

@@ -481,6 +481,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
+        depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
+        deprecate(
+            "enable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_slicing()

    def disable_vae_slicing(self):
@@ -488,6 +494,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
+        deprecate(
+            "disable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_slicing()

    def enable_vae_tiling(self):
@@ -496,6 +508,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    def disable_vae_tiling(self):
@@ -503,6 +521,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    @property
@@ -620,7 +620,7 @@ class LTXSTGPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraLoaderM
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -682,7 +682,7 @@ class LTXImageToVideoSTGPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVide
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -26,11 +26,7 @@ from diffusers.models import AutoencoderKLMochi, MochiTransformer3DModel
 from diffusers.pipelines.mochi.pipeline_output import MochiPipelineOutput
 from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
-from diffusers.utils import (
-    is_torch_xla_available,
-    logging,
-    replace_example_docstring,
-)
+from diffusers.utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
 from diffusers.utils.torch_utils import randn_tensor
 from diffusers.video_processor import VideoProcessor

@@ -458,6 +454,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        """
+        depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
+        deprecate(
+            "enable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_slicing()

    def disable_vae_slicing(self):
@@ -465,6 +467,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
+        deprecate(
+            "disable_vae_slicing",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_slicing()

    def enable_vae_tiling(self):
@@ -473,6 +481,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        """
+        depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
+        deprecate(
+            "enable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.enable_tiling()

    def disable_vae_tiling(self):
@@ -480,6 +494,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        """
+        depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
+        deprecate(
+            "disable_vae_tiling",
+            "0.40.0",
+            depr_message,
+        )
        self.vae.disable_tiling()

    def prepare_latents(
@@ -603,7 +623,7 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -657,7 +657,7 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -656,7 +656,7 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -591,7 +591,7 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -695,7 +695,7 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -326,7 +326,7 @@ class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -122,7 +122,7 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -279,7 +279,7 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin)
            latents (`torch.Tensor`, optional):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, optional, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -670,7 +670,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -810,7 +810,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -804,7 +804,7 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
@@ -179,7 +179,7 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor will ge generated by sampling using the supplied random `generator`.
+                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
sayakpaul	92199ff3ac	up	2025-09-22 16:46:49 +05:30
sayakpaul	04e9323055	up	2025-09-18 17:23:04 +05:30
sayakpaul	9a09162baf	up	2025-09-18 14:59:00 +05:30
sayakpaul	33a8a3be0c	up	2025-09-18 14:49:48 +05:30
sayakpaul	58743c3ee7	kernelize gelu.	2025-09-16 18:09:12 +05:30
sayakpaul	50c0b786d2	start kernelize.	2025-09-15 16:26:52 +05:30
Daniel Socek	f5c113e439	Use SDP on BF16 in GPU/HPU migration (#12310 ) * Use SDP on BF16 in GPU/HPU migration Signed-off-by: Daniel Socek <daniel.socek@intel.com> * Formatting fix for enabling SDP with BF16 precision on HPU Signed-off-by: Daniel Socek <daniel.socek@intel.com> --------- Signed-off-by: Daniel Socek <daniel.socek@intel.com>	2025-09-12 08:00:36 -10:00
Sayak Paul	5e181eddfe	Deprecate slicing and tiling methods from `DiffusionPipeline` (#12271 ) * deprecate slicing from flux pipeline. * propagate. * tiling * up * up	2025-09-11 10:04:35 +05:30
Justin Ruan	55f0b3d758	Fix AttributeError of `VisualClozeProcessor` (#12121 ) Co-authored-by: YiYi Xu <yixu310@gmail.com>	2025-09-11 04:17:34 +05:30
Sayak Paul	eb7ef26736	[quant] allow `components_to_quantize` to be a non-list for single components (#12234 ) * allow non list components_to_quantize. * up * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * [docs] components_to_quantize (#12287) init Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>	2025-09-10 09:47:08 -10:00
ttio2tech	e1b7f1f240	fix for the qwen controlnet pipeline - wrong device can be used (#12309 ) fix the device for textencoder	2025-09-10 08:59:08 -10:00
Sayak Paul	9e7ae568d6	[feat] cache allocator warmup for `from_single_model` (#12305 ) * add * add a test	2025-09-10 12:55:32 +05:30
Sayak Paul	f7b79452b4	[modular] fix flux modular pipelines for t2i and i2i (#12272 ) fix flux modular pipelines for t2i and i2i	2025-09-10 12:39:55 +05:30
Sayak Paul	43459079ab	[core] feat: support group offloading at the pipeline level (#12283 ) * feat: support group offloading at the pipeline level. * add tests * up * [docs] Pipeline group offloading (#12286) init Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>	2025-09-10 09:09:57 +05:30
kaixuanliu	4067d6c4b6	adjust criteria for marigold-intrinsics example on XPU (#12290 ) adjust criteria for XPU Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com> Co-authored-by: Aryan <aryan@huggingface.co>	2025-09-10 03:06:03 +05:30
calcuis	28106fcac4	gguf new quant type support (with demo) (#12076 ) * Update utils.py not perfect but works engine: https://github.com/calcuis/gguf-connector/blob/main/src/gguf_connector/quant2c.py inference example(s): https://github.com/calcuis/gguf-connector/blob/main/src/gguf_connector/k6.py https://github.com/calcuis/gguf-connector/blob/main/src/gguf_connector/k5.py gguf file sample(s): https://huggingface.co/calcuis/kontext-gguf/tree/main https://huggingface.co/calcuis/krea-gguf/tree/main * Apply style fixes --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2025-09-09 17:10:21 +05:30
Leo Jiang	c222570a9b	DeepSpeed adaption for flux-kontext (#12240 ) Co-authored-by: J石页 <jiangshuo9@h-partners.com> Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>	2025-09-09 12:58:08 +05:30
Frank (Haofan) Wang	4e36bb0d23	Support ControlNet-Inpainting for Qwen-Image (#12301 ) * add qwen-image-cn-inpaint --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: yiyixuxu <yixu310@gmail.com>	2025-09-08 14:59:26 -10:00
YiYi Xu	f50b18eec7	[Modular] Qwen (#12220 ) * add qwen modular	2025-09-08 00:27:02 -10:00
Steven Liu	fc337d5853	[docs] Models (#12248 ) * init * fix * feedback * feedback	2025-09-05 11:52:09 -07:00
Steven Liu	32798bf242	[docs] Inference section cleanup (#12281 ) init Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>	2025-09-05 09:34:37 -07:00
Steven Liu	c2e5ece08b	[docs] Sharing pipelines/models (#12280 ) init	2025-09-04 11:43:47 -07:00
co63oc	764b62473a	fix some typos (#12265 ) Signed-off-by: co63oc <co63oc@users.noreply.github.com>	2025-09-03 21:28:24 +05:30
Ju Hoon Park	6682956333	Add AttentionMixin to WanVACETransformer3DModel (#12268 ) * Add AttentionMixin to WanVACETransformer3DModel to enable methods like `set_attn_processor()`. * Import AttentionMixin in transformer_wan_vace.py Special thanks to @tolgacangoz 🙇‍♂️	2025-09-03 15:05:41 +05:30
Sayak Paul	ffc8c0c1e1	[tests] feat: add AoT compilation tests (#12203 ) * feat: add a test for aot. * up	2025-09-03 11:15:27 +05:30
Ishan Modi	4acbfbf13b	[Quantization] Add TRT-ModelOpt as a Backend (#11173 ) * initial commit * update * updates * update * update * update * update * update * update * addressed PR comments * update * addressed PR comments * update * update * update * update * update * update * updates * update * update * addressed PR comments * updates * code formatting * update * addressed PR comments * addressed PR comments * addressed PR comments * addressed PR comments * fix docs and dependencies * fixed dependency test --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>	2025-09-03 10:14:52 +05:30
Steven Liu	6549b04ec6	[docs] AutoPipeline (#12160 ) * refresh * feedback * feedback * supported models * fix	2025-09-02 21:06:26 -07:00
Sayak Paul	130fd8df54	[core] use `kernels` to support `_flash_3_hub` attention backend (#12236 ) * feat: try loading fa3 using kernels when available. * up * change to Hub. * up * up * up * switch env var. * up * up * up * up * up * up	2025-09-03 08:48:07 +05:30
Dhruv Nair	bcd4d77ba6	[CI] Remove big accelerator requirements from Quanto Tests (#12266 ) update	2025-09-03 08:29:31 +05:30
Linoy Tsaban	006d092751	[Flux LoRA] fix for prior preservation and mixed precision sampling, follow up on #11873 (#12264 ) * propagate fixes from https://github.com/huggingface/diffusers/pull/11873/ to flux script * propagate fixes from https://github.com/huggingface/diffusers/pull/11873/ to flux script * propagate fixes from https://github.com/huggingface/diffusers/pull/11873/ to flux script * Apply style fixes --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2025-09-02 11:30:33 +03:00
Ziheng Zhang	9e4a75b142	[docs] Fix VAE scale factor calculation in distributed inference docs (#12259 ) docs: Fix VAE scale factor calculation	2025-09-01 16:34:16 -10:00
Bulat Akhmatov	0ff1aa910c	[fix] fix for prior preservation and mixed precision sampling (#11873 ) Co-authored-by: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>	2025-09-01 14:12:14 +03:00
apolinário	901da9dccc	Fix lora conversion function for ai-toolkit Qwen Image LoRAs (#12261 ) * Fix lora conversion function for ai-toolkit Qwen Image LoRAs * add forgotten parenthesis * remove space new line * update pipeline * detect if arrow or letter * remove whitespaces * style * apply suggestion * apply suggestion * apply suggestion --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>	2025-09-01 14:24:38 +05:30
Nguyễn Trọng Tuấn	67ffa7031e	Add Qwen-Image-Edit Inpainting pipeline (#12225 ) * add qwenimage-edit inpaint feature * stay up to date with main branch * fix style * fix docs * copies * fix * again * copies --------- Co-authored-by: “Trgtuan10” <“tuannguyentrong.402@gmail.com”> Co-authored-by: TuanNT-ZenAI <tuannt.zenai@gmail.com> Co-authored-by: yiyixuxu <yixu310@gmail.com>	2025-08-30 19:49:15 -10:00
Leo Jiang	827fad66a0	Improve performance of NPU FA (#12260 ) Co-authored-by: J石页 <jiangshuo9@h-partners.com> Co-authored-by: Aryan <aryan@huggingface.co>	2025-08-31 01:48:51 +05:30
Nguyễn Trọng Tuấn	9b721db205	[QwenImageEditPipeline] Add image entry in __call__ function (#12254 ) add entry Co-authored-by: TuanNT-ZenAI <tuannt.zenai@gmail.com>	2025-08-29 20:16:43 -10:00
Dhruv Nair	ba0e732eb0	[Modular] Consolidate `load_default_components` into `load_components` (#12217 ) * update * Apply style fixes * update * update --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2025-08-28 19:55:02 +05:30
Dhruv Nair	b2da59b197	[Modular] Provide option to disable custom code loading globally via env variable (#12177 ) * update * update * update * update	2025-08-28 19:54:32 +05:30
Dhruv Nair	7aa6af1138	[Refactor] Move testing utils out of src (#12238 ) * update * update * update * update * update * merge main * Revert "merge main" This reverts commit `65efbcead5`.	2025-08-28 19:53:02 +05:30
Aryan	87b800e154	[modular diffusers] Fix AutoGuidance validation (#12247 ) fix	2025-08-28 15:23:26 +05:30
YiYi Xu	e58711e73c	[Modular] support standard repo (#11944 ) * make modular pipeline work with model_index.json * up * style * up * up * style * up more * Fix MultiControlNet import (#12118) fix --------- Co-authored-by: Álvaro Somoza <asomoza@users.noreply.github.com> Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>	2025-08-28 10:18:07 +02:00
Steven Liu	cbecc33570	[docs] Reproducibility (#12237 ) * init * dupe * feedback	2025-08-27 11:35:31 -07:00
Steven Liu	5237a82a35	[docs] Remove Flax (#12244 ) * remove flax * toctree * feedback	2025-08-27 11:11:07 -07:00
Parag Ekbote	513dbdb2f3	Fix Table Rendering in ReadME (#12245 ) fix table rendering readme issue in readme.	2025-08-27 10:44:49 -07:00
YiYi Xu	865ba102b3	[Qwen-Image] adding validation for guidance_scale, true_cfg_scale and negative_prompt (#12223 ) * up	2025-08-27 01:04:33 -10:00
Manith Ratnayake	552c127c05	docs: correct typos in using-diffusers/other-formats (#12243 )	2025-08-26 08:48:05 -07:00
Tianqi Tang	4b7fe044e3	Fix typos and inconsistencies (#12204 ) Fix typos and test assertions Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>	2025-08-26 07:58:08 -07:00
Sayak Paul	532f41c999	Deprecate Flax support (#12151 ) * start removing flax stuff. * add deprecation warning. * add warning messages. * more warnings. * remove dockerfiles. * remove more. * Update src/diffusers/models/attention_flax.py Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com> * up --------- Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>	2025-08-26 09:58:16 +02:00
Tolga Cangöz	5fcd5f560f	Propose to update & upgrade SkyReels-V2 (#12167 ) * fix: update SkyReels-V2 documentation and moving into attn dispatcher * Refactors SkyReelsV2's attention implementation * style * up * Fixes formatting in SkyReels-V2 documentation Wraps the visual demonstration section in a Markdown code block. This change corrects the rendering of ASCII diagrams and examples, improving the overall readability of the document. * Docs: Condense example arrays in skyreels_v2 guide Improves the readability of the `step_matrix` examples by replacing long sequences of repeated numbers with a more compact `value×count` notation. This change makes the underlying data patterns in the examples easier to understand at a glance. * Add _repeated_blocks attribute to SkyReelsV2Transformer3DModel * Refactor rotary embedding calculations in SkyReelsV2 to separate cosine and sine frequencies * Enhance SkyReels-V2 documentation: update model loading for GPU support and remove outdated notes * up * up * Update model_id in SkyReels-V2 documentation * up * refactor: remove device_map parameter for model loading and add pipeline.to("cuda") for GPU allocation * fix: update copyright year to 2025 in skyreels_v2.md * docs: enhance parameter examples and formatting in skyreels_v2.md * docs: update example formatting and add notes on LoRA support in skyreels_v2.md * refactor: remove copied comments from transformer_wan in SkyReelsV2 classes * Clean up comments in skyreels_v2.md Removed comments about acceleration helpers and Flash Attention installation. * Add deprecation warning for `SkyReelsV2AttnProcessor2_0` class	2025-08-26 12:54:19 +05:30
Leo Jiang	0fd7ee79ea	NPU attention refactor for FLUX (#12209 ) * NPU attention refactor for FLUX transformer * Apply style fixes --------- Co-authored-by: J石页 <jiangshuo9@h-partners.com> Co-authored-by: Aryan <aryan@huggingface.co> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2025-08-26 12:53:55 +05:30
sqt	0d1c5b0c3e	Fix typo: 'will ge generated' -> 'will be generated' (#12231 )	2025-08-25 12:47:52 -07:00
Meta	0e46c55931	Update README.md (#12193 )	2025-08-25 11:35:56 -07:00
Manith Ratnayake	8f8888a76e	[docs] typo : corrected 'compile regions' to 'compile_regions' (#12199 ) [docs] typo: corrected 'compile regions' to 'compile_regions'	2025-08-25 11:35:48 -07:00
Cyan	afc9721898	Fix typo in LoRA (#12228 ) Fix formatting in using_peft_for_inference.md	2025-08-25 11:19:55 -07:00
Steven Liu	2c4ee10b77	[docs] Diffusion pipeline (#12148 ) * init * refactor * refresh * fix? * fix? * fix * fix-copies * feedback * feedback * fix * feedback	2025-08-25 11:06:12 -07:00
Sayak Paul	cf1ca728ea	fix title for compile + offload quantized models (#12233 ) * up * up * Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>	2025-08-25 17:42:06 +02:00
Sayak Paul	144e6e2540	[docs] change wan2.1 -> wan (#12230 ) * change wan2.1 -> wan * up	2025-08-25 17:30:12 +02:00
Sadhvi	22b229ba66	added a fast test for Qwen-Image Controlnet Pipeline (#12226 ) * added test qwen image controlnet * Apply style fixes * added test qwenimage multicontrolnet * Apply style fixes --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2025-08-24 15:58:21 -10:00
Aryan	a840c39ad8	[refactor] Make guiders return their inputs (#12213 ) * update * update * apply review suggestions * remove guider inputs * fix tests	2025-08-23 06:48:55 -10:00
Aishwarya Badlani	9a7ae77a4e	Fix PyTorch 2.3.1 compatibility: add version guard for torch.library.… (#12206 ) * Fix PyTorch 2.3.1 compatibility: add version guard for torch.library.custom_op - Add hasattr() check for torch.library.custom_op and register_fake - These functions were added in PyTorch 2.4, causing import failures in 2.3.1 - Both decorators and functions are now properly guarded with version checks - Maintains backward compatibility while preserving functionality Fixes #12195 * Use dummy decorators approach for PyTorch version compatibility - Replace hasattr check with version string comparison - Add no-op decorator functions for PyTorch < 2.4.0 - Follows pattern from #11941 as suggested by reviewer - Maintains cleaner code structure without indentation changes * Update src/diffusers/models/attention_dispatch.py Update all the decorator usages Co-authored-by: Aryan <contact.aryanvs@gmail.com> * Update src/diffusers/models/attention_dispatch.py Co-authored-by: Aryan <contact.aryanvs@gmail.com> * Update src/diffusers/models/attention_dispatch.py Co-authored-by: Aryan <contact.aryanvs@gmail.com> * Update src/diffusers/models/attention_dispatch.py Co-authored-by: Aryan <contact.aryanvs@gmail.com> * Move version check to top of file and use private naming as requested * Apply style fixes --------- Co-authored-by: Aryan <contact.aryanvs@gmail.com> Co-authored-by: Aryan <aryan@huggingface.co> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2025-08-23 12:52:09 +05:30
Sayak Paul	673d4357ff	add attentionmixin to qwen image (#12219 )	2025-08-23 04:48:32 +05:30
Frank (Haofan) Wang	561ab54de3	Support ControlNet for Qwen-Image (#12215 ) * support qwen-image-cn-union --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: YiYi Xu <yixu310@gmail.com>	2025-08-22 11:00:01 -10:00
Steven Liu	b60faf456b	[docs] Pipeline callbacks (#12212 ) * init * review	2025-08-22 13:01:24 -07:00
Steven Liu	3e73dc24a4	[docs] Community pipelines (#12201 ) * refresh * feedback	2025-08-22 10:42:13 -07:00