[Styling] stylify using ruff (#5841)

* ruff format * not need to use doc-builder's black styling as the doc is styled in ruff * make fix-copies * comment * use run_ruff
2023-11-20 11:48:34 +01:00 · 2023-11-20 11:48:34 +01:00 · 6b04d61cf6
commit 6b04d61cf6
parent 9c7f7fc475
134 changed files with 430 additions and 297 deletions
--- a/.github/workflows/pr_quality.yml
+++ b/.github/workflows/pr_quality.yml
@ -27,9 +27,8 @@ jobs:
          pip install .[quality]
      - name: Check quality
        run: |
-          black --check examples tests src utils scripts
-          ruff examples tests src utils scripts
-          doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
+          ruff check examples tests src utils scripts
+          ruff format examples tests src utils scripts --check

  check_repository_consistency:
    runs-on: ubuntu-latest
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -410,7 +410,7 @@ Diffusers has grown a lot. Here is the command for it:
 $ make test
 ```

-🧨 Diffusers relies on `black` and `isort` to format its source code
+🧨 Diffusers relies on `ruff` and `isort` to format its source code
 consistently. After you make changes, apply automatic style corrections and code verifications
 that can't be automated in one go with:

--- a/14
+++ b/14
@ -9,8 +9,8 @@ modified_only_fixup:
 	$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
 	@if test -n "$(modified_py_files)"; then \
 		echo "Checking/fixing $(modified_py_files)"; \
-		black $(modified_py_files); \
-		ruff $(modified_py_files); \
+		ruff check $(modified_py_files) --fix; \
+		ruff format $(modified_py_files);\
 	else \
 		echo "No library .py files were modified"; \
 	fi
@ -40,23 +40,21 @@ repo-consistency:
 # this target runs checks on all files

 quality:
-	black --check $(check_dirs)
-	ruff $(check_dirs)
-	doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
+	ruff check $(check_dirs) setup.py
+	ruff format --check $(check_dirs) setup.py 
 	python utils/check_doc_toc.py

 # Format source code automatically and check is there are any problems left that need manual fixing

 extra_style_checks:
 	python utils/custom_init_isort.py
-	doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source
 	python utils/check_doc_toc.py --fix_and_overwrite

 # this target runs checks on all files and potentially modifies some of them

 style:
-	black $(check_dirs)
-	ruff $(check_dirs) --fix
+	ruff check $(check_dirs) setup.py --fix
+	ruff format $(check_dirs) setup.py
 	${MAKE} autogenerate_code
 	${MAKE} extra_style_checks

--- a/examples/community/composable_stable_diffusion.py
+++ b/examples/community/composable_stable_diffusion.py
@ -65,6 +65,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline):
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]

    def __init__(
--- a/examples/community/latent_consistency_img2img.py
+++ b/examples/community/latent_consistency_img2img.py
@ -564,9 +564,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
        elif beta_schedule == "scaled_linear":
            # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
        elif beta_schedule == "squaredcos_cap_v2":
            # Glide cosine schedule
            self.betas = betas_for_alpha_bar(num_train_timesteps)
--- a/examples/community/latent_consistency_txt2img.py
+++ b/examples/community/latent_consistency_txt2img.py
@ -469,9 +469,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
        elif beta_schedule == "scaled_linear":
            # this schedule is very specific to the latent diffusion model.
-            self.betas = (
-                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
-            )
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
        elif beta_schedule == "squaredcos_cap_v2":
            # Glide cosine schedule
            self.betas = betas_for_alpha_bar(num_train_timesteps)
--- a/examples/community/lpw_stable_diffusion.py
+++ b/examples/community/lpw_stable_diffusion.py
@ -56,10 +56,10 @@ def parse_prompt_attention(text):
      (abc) - increases attention to abc by a multiplier of 1.1
      (abc:3.12) - increases attention to abc by a multiplier of 3.12
      [abc] - decreases attention to abc by a multiplier of 1.1
-      \( - literal character '('
-      \[ - literal character '['
-      \) - literal character ')'
-      \] - literal character ']'
+      \\( - literal character '('
+      \\[ - literal character '['
+      \\) - literal character ')'
+      \\] - literal character ']'
      \\ - literal character '\'
      anything else - just text
    >>> parse_prompt_attention('normal text')
@ -68,7 +68,7 @@ def parse_prompt_attention(text):
    [['an ', 1.0], ['important', 1.1], [' word', 1.0]]
    >>> parse_prompt_attention('(unbalanced')
    [['unbalanced', 1.1]]
-    >>> parse_prompt_attention('\(literal\]')
+    >>> parse_prompt_attention('\\(literal\\]')
    [['(literal]', 1.0]]
    >>> parse_prompt_attention('(unnecessary)(parens)')
    [['unnecessaryparens', 1.1]]
--- a/examples/community/lpw_stable_diffusion_onnx.py
+++ b/examples/community/lpw_stable_diffusion_onnx.py
@ -82,10 +82,10 @@ def parse_prompt_attention(text):
      (abc) - increases attention to abc by a multiplier of 1.1
      (abc:3.12) - increases attention to abc by a multiplier of 3.12
      [abc] - decreases attention to abc by a multiplier of 1.1
-      \( - literal character '('
-      \[ - literal character '['
-      \) - literal character ')'
-      \] - literal character ']'
+      \\( - literal character '('
+      \\[ - literal character '['
+      \\) - literal character ')'
+      \\] - literal character ']'
      \\ - literal character '\'
      anything else - just text
    >>> parse_prompt_attention('normal text')
@ -94,7 +94,7 @@ def parse_prompt_attention(text):
    [['an ', 1.0], ['important', 1.1], [' word', 1.0]]
    >>> parse_prompt_attention('(unbalanced')
    [['unbalanced', 1.1]]
-    >>> parse_prompt_attention('\(literal\]')
+    >>> parse_prompt_attention('\\(literal\\]')
    [['(literal]', 1.0]]
    >>> parse_prompt_attention('(unnecessary)(parens)')
    [['unnecessaryparens', 1.1]]
@ -433,6 +433,7 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(OnnxStableDiffusionPipeline
    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
    """
+
    if version.parse(version.parse(diffusers.__version__).base_version) >= version.parse("0.9.0"):

        def __init__(
--- a/examples/community/lpw_stable_diffusion_xl.py
+++ b/examples/community/lpw_stable_diffusion_xl.py
@ -46,10 +46,10 @@ def parse_prompt_attention(text):
      (abc) - increases attention to abc by a multiplier of 1.1
      (abc:3.12) - increases attention to abc by a multiplier of 3.12
      [abc] - decreases attention to abc by a multiplier of 1.1
-      \( - literal character '('
-      \[ - literal character '['
-      \) - literal character ')'
-      \] - literal character ']'
+      \\( - literal character '('
+      \\[ - literal character '['
+      \\) - literal character ')'
+      \\] - literal character ']'
      \\ - literal character '\'
      anything else - just text

@ -59,7 +59,7 @@ def parse_prompt_attention(text):
    [['an ', 1.0], ['important', 1.1], [' word', 1.0]]
    >>> parse_prompt_attention('(unbalanced')
    [['unbalanced', 1.1]]
-    >>> parse_prompt_attention('\(literal\]')
+    >>> parse_prompt_attention('\\(literal\\]')
    [['(literal]', 1.0]]
    >>> parse_prompt_attention('(unnecessary)(parens)')
    [['unnecessaryparens', 1.1]]
--- a/examples/community/magic_mix.py
+++ b/examples/community/magic_mix.py
@ -127,9 +127,9 @@ class MagicMixPipeline(DiffusionPipeline):
                        timesteps=t,
                    )

-                    input = (mix_factor * latents) + (
-                        1 - mix_factor
-                    ) * orig_latents  # interpolating between layout noise and conditionally generated noise to preserve layout sematics
+                    input = (
+                        (mix_factor * latents) + (1 - mix_factor) * orig_latents
+                    )  # interpolating between layout noise and conditionally generated noise to preserve layout sematics
                    input = torch.cat([input] * 2)

                else:  # content generation phase
--- a/examples/community/mixture_canvas.py
+++ b/examples/community/mixture_canvas.py
@ -453,9 +453,7 @@ class StableDiffusionCanvasPipeline(DiffusionPipeline):
                    :,
                    region.latent_row_init : region.latent_row_end,
                    region.latent_col_init : region.latent_col_end,
-                ] += (
-                    noise_pred_region * mask_weights_region
-                )
+                ] += noise_pred_region * mask_weights_region
                contributors[
                    :,
                    :,
--- a/examples/community/pipeline_prompt2prompt.py
+++ b/examples/community/pipeline_prompt2prompt.py
@ -65,6 +65,7 @@ class Prompt2PromptPipeline(StableDiffusionPipeline):
        feature_extractor ([`CLIPFeatureExtractor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]

    @torch.no_grad()
--- a/examples/community/pipeline_zero1to3.py
+++ b/examples/community/pipeline_zero1to3.py
@ -94,6 +94,7 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline):
        cc_projection ([`CCProjection`]):
            Projection layer to project the concated CLIP features and pose embeddings to the original CLIP feature size.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]

    def __init__(
@ -658,7 +659,8 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline):

        if isinstance(generator, list):
            init_latents = [
-                self.vae.encode(image[i : i + 1]).latent_dist.mode(generator[i]) for i in range(batch_size)  # sample
+                self.vae.encode(image[i : i + 1]).latent_dist.mode(generator[i])
+                for i in range(batch_size)  # sample
            ]
            init_latents = torch.cat(init_latents, dim=0)
        else:
--- a/examples/community/run_onnx_controlnet.py
+++ b/examples/community/run_onnx_controlnet.py
@ -651,9 +651,10 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = num_controlnet
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs. Raise error if not correct
        self.check_inputs(
--- a/examples/community/run_tensorrt_controlnet.py
+++ b/examples/community/run_tensorrt_controlnet.py
@ -755,9 +755,10 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = num_controlnet
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs. Raise error if not correct
        self.check_inputs(
--- a/examples/community/sd_text2img_k_diffusion.py
+++ b/examples/community/sd_text2img_k_diffusion.py
@ -68,6 +68,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]

    def __init__(
--- a/examples/community/stable_diffusion_ipex.py
+++ b/examples/community/stable_diffusion_ipex.py
@ -89,6 +89,7 @@ class StableDiffusionIPEXPipeline(DiffusionPipeline, TextualInversionLoaderMixin
        feature_extractor ([`CLIPFeatureExtractor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]

    def __init__(
--- a/examples/community/stable_diffusion_mega.py
+++ b/examples/community/stable_diffusion_mega.py
@ -50,6 +50,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline):
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]

    def __init__(
--- a/examples/community/stable_diffusion_repaint.py
+++ b/examples/community/stable_diffusion_repaint.py
@ -170,6 +170,7 @@ class StableDiffusionRepaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]

    def __init__(
--- a/examples/research_projects/colossalai/train_dreambooth_colossalai.py
+++ b/examples/research_projects/colossalai/train_dreambooth_colossalai.py
@ -464,9 +464,7 @@ def main(args):
    unet = gemini_zero_dpp(unet, args.placement)

    # config optimizer for colossalai zero
-    optimizer = GeminiAdamOptimizer(
-        unet, lr=args.learning_rate, initial_scale=2**5, clipping_norm=args.max_grad_norm
-    )
+    optimizer = GeminiAdamOptimizer(unet, lr=args.learning_rate, initial_scale=2**5, clipping_norm=args.max_grad_norm)

    # load noise_scheduler
    noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,10 +1,6 @@
-[tool.black]
-line-length = 119
-target-version = ['py37']
-
 [tool.ruff]
 # Never enforce `E501` (line length violations).
-ignore = ["C901", "E501", "E741", "W605"]
+ignore = ["C901", "E501", "E741", "F402", "F823"]
 select = ["C", "E", "F", "I", "W"]
 line-length = 119

@ -16,3 +12,16 @@ line-length = 119
 [tool.ruff.isort]
 lines-after-imports = 2
 known-first-party = ["diffusers"]
+
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
--- a/scripts/convert_kakao_brain_unclip_to_diffusers.py
+++ b/scripts/convert_kakao_brain_unclip_to_diffusers.py
@ -11,7 +11,7 @@ from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
 from diffusers.schedulers.scheduling_unclip import UnCLIPScheduler


-"""
+r"""
 Example - From the diffusers root directory:

 Download weights:
--- a/setup.cfg
+++ b/setup.cfg
@ -1,20 +0,0 @@
-[isort]
-default_section = FIRSTPARTY
-ensure_newline_before_comments = True
-force_grid_wrap = 0
-include_trailing_comma = True
-known_first_party = accelerate
-known_third_party =
-    numpy
-    torch
-    torch_xla
-
-line_length = 119
-lines_after_imports = 2
-multi_line_output = 3
-use_parentheses = True
-
-[flake8]
-ignore = E203, E722, E501, E741, W503, W605
-max-line-length = 119
-per-file-ignores = __init__.py:F401
--- a/setup.py
+++ b/setup.py
@ -44,9 +44,9 @@ To create the package for PyPI.
   For the sources, run: "python setup.py sdist"
   You should now have a /dist directory with both .whl and .tar.gz source versions.

-   Long story cut short, you need to run both before you can upload the distribution to the 
-   test PyPI and the actual PyPI servers: 
-   
+   Long story cut short, you need to run both before you can upload the distribution to the
+   test PyPI and the actual PyPI servers:
+
   python setup.py bdist_wheel && python setup.py sdist

 8. Check that everything looks correct by uploading the package to the PyPI test server:
@ -78,9 +78,9 @@ To create the package for PyPI.
    you need to go back to main before executing this.
 """

-import sys
 import os
 import re
+import sys
 from distutils.core import Command

 from setuptools import find_packages, setup
@ -93,7 +93,6 @@ _deps = [
    "Pillow",  # keep the PIL.Image.Resampling deprecation away
    "accelerate>=0.11.0",
    "compel==0.1.8",
-    "black~=23.1",
    "datasets",
    "filelock",
    "flax>=0.4.1",
@ -119,7 +118,7 @@ _deps = [
    "pytest-timeout",
    "pytest-xdist",
    "python>=3.8.0",
-    "ruff==0.0.280",
+    "ruff>=0.1.5,<=0.2",
    "safetensors>=0.3.1",
    "sentencepiece>=0.1.91,!=0.1.92",
    "scipy",
@ -171,7 +170,11 @@ class DepsTableUpdateCommand(Command):
    description = "build runtime dependency table"
    user_options = [
        # format: (long option, short option, description).
-        ("dep-table-update", None, "updates src/diffusers/dependency_versions_table.py"),
+        (
+            "dep-table-update",
+            None,
+            "updates src/diffusers/dependency_versions_table.py",
+        ),
    ]

    def initialize_options(self):
@ -197,10 +200,8 @@ class DepsTableUpdateCommand(Command):
            f.write("\n".join(content))


-
-
 extras = {}
-extras["quality"] = deps_list("urllib3", "black", "isort", "ruff", "hf-doc-builder")
+extras["quality"] = deps_list("urllib3", "isort", "ruff", "hf-doc-builder")
 extras["docs"] = deps_list("hf-doc-builder")
 extras["training"] = deps_list("accelerate", "datasets", "protobuf", "tensorboard", "Jinja2")
 extras["test"] = deps_list(
@ -275,10 +276,7 @@ setup(
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Programming Language :: Python :: 3",
    ]
-    + [
-        f"Programming Language :: Python :: 3.{i}"
-        for i in range(8, version_range_max)
-    ],
+    + [f"Programming Language :: Python :: 3.{i}" for i in range(8, version_range_max)],
    cmdclass={"deps_table_update": DepsTableUpdateCommand},
 )

--- a/src/diffusers/configuration_utils.py
+++ b/src/diffusers/configuration_utils.py
@ -95,6 +95,7 @@ class ConfigMixin:
          should only have a `kwargs` argument if at least one argument is deprecated (should be overridden by
          subclass).
    """
+
    config_name = None
    ignore_for_config = []
    has_compatibles = False
--- a/src/diffusers/dependency_versions_table.py
+++ b/src/diffusers/dependency_versions_table.py
@ -5,7 +5,6 @@ deps = {
    "Pillow": "Pillow",
    "accelerate": "accelerate>=0.11.0",
    "compel": "compel==0.1.8",
-    "black": "black~=23.1",
    "datasets": "datasets",
    "filelock": "filelock",
    "flax": "flax>=0.4.1",
@ -31,7 +30,7 @@ deps = {
    "pytest-timeout": "pytest-timeout",
    "pytest-xdist": "pytest-xdist",
    "python": "python>=3.8.0",
-    "ruff": "ruff==0.0.280",
+    "ruff": "ruff>=0.1.5,<=0.2",
    "safetensors": "safetensors>=0.3.1",
    "sentencepiece": "sentencepiece>=0.1.91,!=0.1.92",
    "scipy": "scipy",
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@ -71,6 +71,7 @@ class LoraLoaderMixin:
    Load LoRA layers into [`UNet2DConditionModel`] and
    [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel).
    """
+
    text_encoder_name = TEXT_ENCODER_NAME
    unet_name = UNET_NAME
    num_fused_loras = 0
--- a/src/diffusers/models/attention_flax.py
+++ b/src/diffusers/models/attention_flax.py
@ -110,7 +110,10 @@ def jax_memory_efficient_attention(
        )

    _, res = jax.lax.scan(
-        f=chunk_scanner, init=0, xs=None, length=math.ceil(num_q / query_chunk_size)  # start counter  # stop counter
+        f=chunk_scanner,
+        init=0,
+        xs=None,
+        length=math.ceil(num_q / query_chunk_size),  # start counter  # stop counter
    )

    return jnp.concatenate(res, axis=-3)  # fuse the chunked result back
@ -138,6 +141,7 @@ class FlaxAttention(nn.Module):
            Parameters `dtype`

    """
+
    query_dim: int
    heads: int = 8
    dim_head: int = 64
@ -262,6 +266,7 @@ class FlaxBasicTransformerBlock(nn.Module):
            Whether to split the head dimension into a new axis for the self-attention computation. In most cases,
            enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL.
    """
+
    dim: int
    n_heads: int
    d_head: int
@ -347,6 +352,7 @@ class FlaxTransformer2DModel(nn.Module):
            Whether to split the head dimension into a new axis for the self-attention computation. In most cases,
            enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL.
    """
+
    in_channels: int
    n_heads: int
    d_head: int
@ -442,6 +448,7 @@ class FlaxFeedForward(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    dim: int
    dropout: float = 0.0
    dtype: jnp.dtype = jnp.float32
@ -471,6 +478,7 @@ class FlaxGEGLU(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    dim: int
    dropout: float = 0.0
    dtype: jnp.dtype = jnp.float32
--- a/src/diffusers/models/autoencoder_tiny.py
+++ b/src/diffusers/models/autoencoder_tiny.py
@ -91,6 +91,7 @@ class AutoencoderTiny(ModelMixin, ConfigMixin):
            `force_upcast` can be set to `False` (see this fp16-friendly
            [AutoEncoder](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix)).
    """
+
    _supports_gradient_checkpointing = True

    @register_to_config
--- a/src/diffusers/models/controlnet_flax.py
+++ b/src/diffusers/models/controlnet_flax.py
@ -146,6 +146,7 @@ class FlaxControlNetModel(nn.Module, FlaxModelMixin, ConfigMixin):
        conditioning_embedding_out_channels (`tuple`, *optional*, defaults to `(16, 32, 96, 256)`):
            The tuple of output channel for each block in the `conditioning_embedding` layer.
    """
+
    sample_size: int = 32
    in_channels: int = 4
    down_block_types: Tuple[str, ...] = (
--- a/src/diffusers/models/embeddings_flax.py
+++ b/src/diffusers/models/embeddings_flax.py
@ -65,6 +65,7 @@ class FlaxTimestepEmbedding(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
                Parameters `dtype`
    """
+
    time_embed_dim: int = 32
    dtype: jnp.dtype = jnp.float32

@ -84,6 +85,7 @@ class FlaxTimesteps(nn.Module):
        dim (`int`, *optional*, defaults to `32`):
                Time step embedding dimension
    """
+
    dim: int = 32
    flip_sin_to_cos: bool = False
    freq_shift: float = 1
--- a/src/diffusers/models/modeling_flax_utils.py
+++ b/src/diffusers/models/modeling_flax_utils.py
@ -52,6 +52,7 @@ class FlaxModelMixin(PushToHubMixin):

        - **config_name** ([`str`]) -- Filename to save a model to when calling [`~FlaxModelMixin.save_pretrained`].
    """
+
    config_name = CONFIG_NAME
    _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
    _flax_internal_args = ["name", "parent", "dtype"]
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@ -193,6 +193,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):

        - **config_name** ([`str`]) -- Filename to save a model to when calling [`~models.ModelMixin.save_pretrained`].
    """
+
    config_name = CONFIG_NAME
    _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
    _supports_gradient_checkpointing = False
--- a/src/diffusers/models/unet_2d_blocks_flax.py
+++ b/src/diffusers/models/unet_2d_blocks_flax.py
@ -45,6 +45,7 @@ class FlaxCrossAttnDownBlock2D(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    out_channels: int
    dropout: float = 0.0
@ -125,6 +126,7 @@ class FlaxDownBlock2D(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    out_channels: int
    dropout: float = 0.0
@ -190,6 +192,7 @@ class FlaxCrossAttnUpBlock2D(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    out_channels: int
    prev_output_channel: int
@ -275,6 +278,7 @@ class FlaxUpBlock2D(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    out_channels: int
    prev_output_channel: int
@ -339,6 +343,7 @@ class FlaxUNetMidBlock2DCrossAttn(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    dropout: float = 0.0
    num_layers: int = 1
--- a/src/diffusers/models/unet_motion_model.py
+++ b/src/diffusers/models/unet_motion_model.py
@ -174,6 +174,7 @@ class UNetMotionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
    This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
    for all models (such as downloading or saving).
    """
+
    _supports_gradient_checkpointing = True

    @register_to_config
--- a/src/diffusers/models/vae_flax.py
+++ b/src/diffusers/models/vae_flax.py
@ -214,6 +214,7 @@ class FlaxAttentionBlock(nn.Module):
            Parameters `dtype`

    """
+
    channels: int
    num_head_channels: int = None
    num_groups: int = 32
@ -291,6 +292,7 @@ class FlaxDownEncoderBlock2D(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    out_channels: int
    dropout: float = 0.0
@ -347,6 +349,7 @@ class FlaxUpDecoderBlock2D(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    out_channels: int
    dropout: float = 0.0
@ -401,6 +404,7 @@ class FlaxUNetMidBlock2D(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int
    dropout: float = 0.0
    num_layers: int = 1
@ -488,6 +492,7 @@ class FlaxEncoder(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            Parameters `dtype`
    """
+
    in_channels: int = 3
    out_channels: int = 3
    down_block_types: Tuple[str] = ("DownEncoderBlock2D",)
@ -600,6 +605,7 @@ class FlaxDecoder(nn.Module):
        dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
            parameters `dtype`
    """
+
    in_channels: int = 3
    out_channels: int = 3
    up_block_types: Tuple[str] = ("UpDecoderBlock2D",)
@ -767,6 +773,7 @@ class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin):
        dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
            The `dtype` of the parameters.
    """
+
    in_channels: int = 3
    out_channels: int = 3
    down_block_types: Tuple[str] = ("DownEncoderBlock2D",)
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@ -243,10 +243,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
        lora_scale: Optional[float] = None,
        **kwargs,
    ):
-        deprecation_message = (
-            "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()`"
-            " instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
-        )
+        deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
        deprecate("_encode_prompt()", "1.0.0", deprecation_message, standard_warn=False)

        prompt_embeds_tuple = self.encode_prompt(
@ -462,10 +459,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
        return image, has_nsfw_concept

    def decode_latents(self, latents):
-        deprecation_message = (
-            "The decode_latents method is deprecated and will be removed in 1.0.0. Please use"
-            " VaeImageProcessor.postprocess(...) instead"
-        )
+        deprecation_message = "The decode_latents method is deprecated and will be removed in 1.0.0. Please use VaeImageProcessor.postprocess(...) instead"
        deprecate("decode_latents", "1.0.0", deprecation_message, standard_warn=False)

        latents = 1 / self.vae.config.scaling_factor * latents
@ -515,8 +509,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
            k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
        ):
            raise ValueError(
-                f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found"
-                f" {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
+                f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
            )

        if prompt is not None and prompt_embeds is not None:
@ -747,15 +740,13 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
            deprecate(
                "callback",
                "1.0.0",
-                "Passing `callback` as an input argument to `__call__` is deprecated, consider using"
-                " `callback_on_step_end`",
+                "Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
            )
        if callback_steps is not None:
            deprecate(
                "callback_steps",
                "1.0.0",
-                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using"
-                " `callback_on_step_end`",
+                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
            )

        # 0. Default height and width to unet
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@ -252,10 +252,7 @@ class AltDiffusionImg2ImgPipeline(
        lora_scale: Optional[float] = None,
        **kwargs,
    ):
-        deprecation_message = (
-            "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()`"
-            " instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
-        )
+        deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
        deprecate("_encode_prompt()", "1.0.0", deprecation_message, standard_warn=False)

        prompt_embeds_tuple = self.encode_prompt(
@ -471,10 +468,7 @@ class AltDiffusionImg2ImgPipeline(
        return image, has_nsfw_concept

    def decode_latents(self, latents):
-        deprecation_message = (
-            "The decode_latents method is deprecated and will be removed in 1.0.0. Please use"
-            " VaeImageProcessor.postprocess(...) instead"
-        )
+        deprecation_message = "The decode_latents method is deprecated and will be removed in 1.0.0. Please use VaeImageProcessor.postprocess(...) instead"
        deprecate("decode_latents", "1.0.0", deprecation_message, standard_warn=False)

        latents = 1 / self.vae.config.scaling_factor * latents
@ -524,8 +518,7 @@ class AltDiffusionImg2ImgPipeline(
            k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
        ):
            raise ValueError(
-                f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found"
-                f" {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
+                f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
            )
        if prompt is not None and prompt_embeds is not None:
            raise ValueError(
@ -578,8 +571,8 @@ class AltDiffusionImg2ImgPipeline(
        else:
            if isinstance(generator, list) and len(generator) != batch_size:
                raise ValueError(
-                    f"You have passed a list of generators of length {len(generator)}, but requested an effective"
-                    f" batch size of {batch_size}. Make sure the batch size matches the length of the generators."
+                    f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                    f" size of {batch_size}. Make sure the batch size matches the length of the generators."
                )

            elif isinstance(generator, list):
@ -798,15 +791,13 @@ class AltDiffusionImg2ImgPipeline(
            deprecate(
                "callback",
                "1.0.0",
-                "Passing `callback` as an input argument to `__call__` is deprecated, consider use"
-                " `callback_on_step_end`",
+                "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
            )
        if callback_steps is not None:
            deprecate(
                "callback_steps",
                "1.0.0",
-                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use"
-                " `callback_on_step_end`",
+                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
            )

        # 1. Check inputs. Raise error if not correct
--- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py
@ -99,6 +99,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLo
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"

    def __init__(
--- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
+++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
@ -72,6 +72,7 @@ class AudioLDMPipeline(DiffusionPipeline):
        vocoder ([`~transformers.SpeechT5HifiGan`]):
            Vocoder of class `SpeechT5HifiGan`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"

    def __init__(
--- a/src/diffusers/pipelines/auto_pipeline.py
+++ b/src/diffusers/pipelines/auto_pipeline.py
@ -181,6 +181,7 @@ class AutoPipelineForText2Image(ConfigMixin):
          diffusion pipeline's components.

    """
+
    config_name = "model_index.json"

    def __init__(self, *args, **kwargs):
@ -451,6 +452,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
          diffusion pipeline's components.

    """
+
    config_name = "model_index.json"

    def __init__(self, *args, **kwargs):
@ -726,6 +728,7 @@ class AutoPipelineForInpainting(ConfigMixin):
          diffusion pipeline's components.

    """
+
    config_name = "model_index.json"

    def __init__(self, *args, **kwargs):
--- a/src/diffusers/pipelines/consistency_models/pipeline_consistency_models.py
+++ b/src/diffusers/pipelines/consistency_models/pipeline_consistency_models.py
@ -74,6 +74,7 @@ class ConsistencyModelPipeline(DiffusionPipeline):
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Currently only
            compatible with [`CMStochasticIterativeScheduler`].
    """
+
    model_cpu_offload_seq = "unet"

    def __init__(self, unet: UNet2DModel, scheduler: CMStochasticIterativeScheduler) -> None:
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@ -126,6 +126,7 @@ class StableDiffusionControlNetPipeline(
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
@ -886,9 +887,10 @@ class StableDiffusionControlNetPipeline(
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs. Raise error if not correct
        self.check_inputs(
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@ -160,6 +160,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
@ -933,9 +934,10 @@ class StableDiffusionControlNetImg2ImgPipeline(
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs. Raise error if not correct
        self.check_inputs(
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@ -282,6 +282,7 @@ class StableDiffusionControlNetInpaintPipeline(
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
@ -1142,9 +1143,10 @@ class StableDiffusionControlNetInpaintPipeline(
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs. Raise error if not correct
        self.check_inputs(
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@ -164,6 +164,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    """
+
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]

@ -1198,9 +1199,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # # 0.0 Default height and width to unet
        # height = height or self.unet.config.sample_size * self.vae_scale_factor
@ -1213,9 +1215,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs
        self.check_inputs(
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@ -139,6 +139,7 @@ class StableDiffusionXLControlNetPipeline(
            watermark output images. If not defined, it defaults to `True` if the package is installed; otherwise no
            watermarker is used.
    """
+
    # leave controlnet out on purpose because it iterates with unet
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
@ -1005,9 +1006,10 @@ class StableDiffusionXLControlNetPipeline(
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs. Raise error if not correct
        self.check_inputs(
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@ -192,6 +192,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
            watermark output images. If not defined, it will default to True if the package is installed, otherwise no
            watermarker will be used.
    """
+
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]

@ -1154,9 +1155,10 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
-            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
-                control_guidance_end
-            ]
+            control_guidance_start, control_guidance_end = (
+                mult * [control_guidance_start],
+                mult * [control_guidance_end],
+            )

        # 1. Check inputs. Raise error if not correct
        self.check_inputs(
--- a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py
+++ b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py
@ -39,6 +39,7 @@ class DanceDiffusionPipeline(DiffusionPipeline):
            A scheduler to be used in combination with `unet` to denoise the encoded audio latents. Can be one of
            [`IPNDMScheduler`].
    """
+
    model_cpu_offload_seq = "unet"

    def __init__(self, unet, scheduler):
--- a/src/diffusers/pipelines/ddim/pipeline_ddim.py
+++ b/src/diffusers/pipelines/ddim/pipeline_ddim.py
@ -35,6 +35,7 @@ class DDIMPipeline(DiffusionPipeline):
            A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
            [`DDPMScheduler`], or [`DDIMScheduler`].
    """
+
    model_cpu_offload_seq = "unet"

    def __init__(self, unet, scheduler):
--- a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py
+++ b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py
@ -35,6 +35,7 @@ class DDPMPipeline(DiffusionPipeline):
            A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
            [`DDPMScheduler`], or [`DDIMScheduler`].
    """
+
    model_cpu_offload_seq = "unet"

    def __init__(self, unet, scheduler):
--- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py
+++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py
@ -98,7 +98,19 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
    watermarker: Optional[IFWatermarker]

    bad_punct_regex = re.compile(
-        r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
+        r"["
+        + "#®•©™&@·º½¾¿¡§~"
+        + r"\)"
+        + r"\("
+        + r"\]"
+        + r"\["
+        + r"\}"
+        + r"\{"
+        + r"\|"
+        + "\\"
+        + r"\/"
+        + r"\*"
+        + r"]{1,}"
    )  # noqa

    _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
--- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py
+++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py
@ -122,7 +122,19 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
    watermarker: Optional[IFWatermarker]

    bad_punct_regex = re.compile(
-        r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
+        r"["
+        + "#®•©™&@·º½¾¿¡§~"
+        + r"\)"
+        + r"\("
+        + r"\]"
+        + r"\["
+        + r"\}"
+        + r"\{"
+        + r"\|"
+        + "\\"
+        + r"\/"
+        + r"\*"
+        + r"]{1,}"
    )  # noqa

    _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
--- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py
+++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py
@ -126,7 +126,19 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
    watermarker: Optional[IFWatermarker]

    bad_punct_regex = re.compile(
-        r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
+        r"["
+        + "#®•©™&@·º½¾¿¡§~"
+        + r"\)"
+        + r"\("
+        + r"\]"
+        + r"\["
+        + r"\}"
+        + r"\{"
+        + r"\|"
+        + "\\"
+        + r"\/"
+        + r"\*"
+        + r"]{1,}"
    )  # noqa

    _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor"]
--- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py
+++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py
@ -125,7 +125,19 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
    watermarker: Optional[IFWatermarker]

    bad_punct_regex = re.compile(
-        r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
+        r"["
+        + "#®•©™&@·º½¾¿¡§~"
+        + r"\)"
+        + r"\("
+        + r"\]"
+        + r"\["
+        + r"\}"
+        + r"\{"
+        + r"\|"
+        + "\\"
+        + r"\/"
+        + r"\*"
+        + r"]{1,}"
    )  # noqa

    _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
--- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py
+++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py
@ -128,7 +128,19 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
    watermarker: Optional[IFWatermarker]

    bad_punct_regex = re.compile(
-        r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
+        r"["
+        + "#®•©™&@·º½¾¿¡§~"
+        + r"\)"
+        + r"\("
+        + r"\]"
+        + r"\["
+        + r"\}"
+        + r"\{"
+        + r"\|"
+        + "\\"
+        + r"\/"
+        + r"\*"
+        + r"]{1,}"
    )  # noqa

    model_cpu_offload_seq = "text_encoder->unet"
--- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py
+++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py
@ -84,7 +84,19 @@ class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
    watermarker: Optional[IFWatermarker]

    bad_punct_regex = re.compile(
-        r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
+        r"["
+        + "#®•©™&@·º½¾¿¡§~"
+        + r"\)"
+        + r"\("
+        + r"\]"
+        + r"\["
+        + r"\}"
+        + r"\{"
+        + r"\|"
+        + "\\"
+        + r"\/"
+        + r"\*"
+        + r"]{1,}"
    )  # noqa

    _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
--- a/src/diffusers/pipelines/dit/pipeline_dit.py
+++ b/src/diffusers/pipelines/dit/pipeline_dit.py
@ -43,6 +43,7 @@ class DiTPipeline(DiffusionPipeline):
        scheduler ([`DDIMScheduler`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    """
+
    model_cpu_offload_seq = "transformer->vae"

    def __init__(
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py
@ -115,6 +115,7 @@ class LatentConsistencyModelImg2ImgPipeline(
        requires_safety_checker (`bool`, *optional*, defaults to `True`):
            Whether the pipeline requires a safety checker component.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
+++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py
@ -97,6 +97,7 @@ class LatentConsistencyModelPipeline(
        requires_safety_checker (`bool`, *optional*, defaults to `True`):
            Whether the pipeline requires a safety checker component.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py
+++ b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py
@ -49,6 +49,7 @@ class LDMTextToImagePipeline(DiffusionPipeline):
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    """
+
    model_cpu_offload_seq = "bert->unet->vqvae"

    def __init__(
--- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
+++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
@ -177,6 +177,7 @@ class PaintByExamplePipeline(DiffusionPipeline):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.

    """
+
    # TODO: feature_extractor is required to encode initial images (if they are in PIL format),
    # we should give a descriptive message if the pipeline doesn't have one.

--- a/src/diffusers/pipelines/pipeline_flax_utils.py
+++ b/src/diffusers/pipelines/pipeline_flax_utils.py
@ -112,6 +112,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
        - **config_name** ([`str`]) -- The configuration filename that stores the class and module names of all the
          diffusion pipeline's components.
    """
+
    config_name = "model_index.json"

    def register_modules(self, **kwargs):
--- a/src/diffusers/pipelines/pipeline_utils.py
+++ b/src/diffusers/pipelines/pipeline_utils.py
@ -542,6 +542,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
        - **_optional_components** (`List[str]`) -- List of all optional components that don't have to be passed to the
          pipeline to function (should be overridden by subclasses).
    """
+
    config_name = "model_index.json"
    model_cpu_offload_seq = None
    _optional_components = []
--- a/src/diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py
+++ b/src/diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py
@ -120,8 +120,21 @@ class PixArtAlphaPipeline(DiffusionPipeline):
        scheduler ([`SchedulerMixin`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    """
+
    bad_punct_regex = re.compile(
-        r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
+        r"["
+        + "#®•©™&@·º½¾¿¡§~"
+        + r"\)"
+        + r"\("
+        + r"\]"
+        + r"\["
+        + r"\}"
+        + r"\{"
+        + r"\|"
+        + "\\"
+        + r"\/"
+        + r"\*"
+        + r"]{1,}"
    )  # noqa

    _optional_components = ["tokenizer", "text_encoder"]
--- a/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py
+++ b/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py
@ -35,6 +35,7 @@ class ScoreSdeVePipeline(DiffusionPipeline):
        scheduler ([`ScoreSdeVeScheduler`]):
            A `ScoreSdeVeScheduler` to be used in combination with `unet` to denoise the encoded image.
    """
+
    unet: UNet2DModel
    scheduler: ScoreSdeVeScheduler

--- a/src/diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py
+++ b/src/diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py
@ -54,6 +54,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
            A scheduler to be used in combination with `decoder` to denoise the encoded audio latents.
        melgan ([`OnnxRuntimeModel`]):
    """
+
    _optional_components = ["melgan"]

    def __init__(
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
@ -148,6 +148,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]

--- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py
@ -33,10 +33,7 @@ logger = logging.get_logger(__name__)  # pylint: disable=invalid-name

 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64
 def preprocess(image):
-    deprecation_message = (
-        "The preprocess method is deprecated and will be removed in diffusers 1.0.0. Please use"
-        " VaeImageProcessor.preprocess(...) instead"
-    )
+    deprecation_message = "The preprocess method is deprecated and will be removed in diffusers 1.0.0. Please use VaeImageProcessor.preprocess(...) instead"
    deprecate("preprocess", "1.0.0", deprecation_message, standard_warn=False)
    if isinstance(image, torch.Tensor):
        return image
@ -85,6 +82,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    vae_encoder: OnnxRuntimeModel
    vae_decoder: OnnxRuntimeModel
    text_encoder: OnnxRuntimeModel
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
@ -80,6 +80,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    vae_encoder: OnnxRuntimeModel
    vae_decoder: OnnxRuntimeModel
    text_encoder: OnnxRuntimeModel
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
@ -66,6 +66,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]
    _is_onnx = True

--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@ -102,6 +102,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
@ -196,6 +196,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@ -95,6 +95,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "depth_mask"]

--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
@ -273,6 +273,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor", "inverse_scheduler"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
@ -125,6 +125,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    _optional_components = ["safety_checker", "feature_extractor"]
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
@ -177,6 +177,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
@ -62,6 +62,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    # TODO: feature_extractor is required to encode images (if they are in PIL format),
    # we should give a descriptive message if the pipeline doesn't have one.
    _optional_components = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@ -139,6 +139,7 @@ class StableDiffusionImg2ImgPipeline(
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@ -202,6 +202,7 @@ class StableDiffusionInpaintPipeline(
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
@ -115,6 +115,7 @@ class StableDiffusionInpaintPipelineLegacy(
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
@ -89,6 +89,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
@ -80,6 +80,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
        feature_extractor ([`CLIPImageProcessor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py
@ -79,6 +79,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, FromSingleFileMixi
        scheduler ([`SchedulerMixin`]):
            A [`EulerDiscreteScheduler`] to be used in combination with `unet` to denoise the encoded image latents.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"

    def __init__(
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
@ -115,6 +115,7 @@ class StableDiffusionLDM3DPipeline(
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
@ -66,6 +66,7 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
        with_augs ([`list`]):
            Textual augmentations to apply while editing the text-to-image model. Set to `[]` for no augmentations.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
@ -85,6 +85,7 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
@ -96,6 +96,7 @@ class StableDiffusionParadigmsPipeline(
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
@ -310,6 +310,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
            Whether the pipeline requires a safety checker. We recommend setting it to True if you're using the
            pipeline publicly.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = [
        "safety_checker",
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
@ -124,6 +124,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin)
        feature_extractor ([`~transformers.CLIPImageProcessor`]):
            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@ -92,6 +92,7 @@ class StableDiffusionUpscalePipeline(
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"
    _optional_components = ["watermarker", "safety_checker", "feature_extractor"]
    _exclude_from_cpu_offload = ["safety_checker"]
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@ -140,6 +140,7 @@ class StableDiffusionXLPipeline(
            watermark output images. If not defined, it will default to True if the package is installed, otherwise no
            watermarker will be used.
    """
+
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
    _callback_tensor_inputs = [
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@ -153,6 +153,7 @@ class StableDiffusionXLImg2ImgPipeline(
            watermark output images. If not defined, it will default to True if the package is installed, otherwise no
            watermarker will be used.
    """
+
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
    _callback_tensor_inputs = [
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@ -298,6 +298,7 @@ class StableDiffusionXLInpaintPipeline(
            watermark output images. If not defined, it will default to True if the package is installed, otherwise no
            watermarker will be used.
    """
+
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"

    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py
@ -151,6 +151,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
            watermark output images. If not defined, it will default to True if the package is installed, otherwise no
            watermarker will be used.
    """
+
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]

--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@ -152,6 +152,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
        feature_extractor ([`CLIPFeatureExtractor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->adapter->unet->vae"
    _optional_components = ["safety_checker", "feature_extractor"]

--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@ -159,6 +159,7 @@ class StableDiffusionXLAdapterPipeline(
        feature_extractor ([`CLIPFeatureExtractor`]):
            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
    """
+
    model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
    _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]

--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@ -96,6 +96,7 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"

    def __init__(
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@ -158,6 +158,7 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
    """
+
    model_cpu_offload_seq = "text_encoder->unet->vae"

    def __init__(
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py
@ -183,6 +183,7 @@ class TextToVideoPipelineOutput(BaseOutput):
            List indicating whether the corresponding generated image contains "not-safe-for-work" (nsfw) content or
            `None` if safety checking could not be performed.
    """
+
    images: Union[List[PIL.Image.Image], np.ndarray]
    nsfw_content_detected: Optional[List[bool]]

--- a/Show More
+++ b/Show More