[Styling] stylify using ruff (#5841)
* ruff format * not need to use doc-builder's black styling as the doc is styled in ruff * make fix-copies * comment * use run_ruff
This commit is contained in:
parent
9c7f7fc475
commit
6b04d61cf6
5
.github/workflows/pr_quality.yml
vendored
5
.github/workflows/pr_quality.yml
vendored
@ -27,9 +27,8 @@ jobs:
|
||||
pip install .[quality]
|
||||
- name: Check quality
|
||||
run: |
|
||||
black --check examples tests src utils scripts
|
||||
ruff examples tests src utils scripts
|
||||
doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
|
||||
ruff check examples tests src utils scripts
|
||||
ruff format examples tests src utils scripts --check
|
||||
|
||||
check_repository_consistency:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@ -410,7 +410,7 @@ Diffusers has grown a lot. Here is the command for it:
|
||||
$ make test
|
||||
```
|
||||
|
||||
🧨 Diffusers relies on `black` and `isort` to format its source code
|
||||
🧨 Diffusers relies on `ruff` and `isort` to format its source code
|
||||
consistently. After you make changes, apply automatic style corrections and code verifications
|
||||
that can't be automated in one go with:
|
||||
|
||||
|
||||
14
Makefile
14
Makefile
@ -9,8 +9,8 @@ modified_only_fixup:
|
||||
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
|
||||
@if test -n "$(modified_py_files)"; then \
|
||||
echo "Checking/fixing $(modified_py_files)"; \
|
||||
black $(modified_py_files); \
|
||||
ruff $(modified_py_files); \
|
||||
ruff check $(modified_py_files) --fix; \
|
||||
ruff format $(modified_py_files);\
|
||||
else \
|
||||
echo "No library .py files were modified"; \
|
||||
fi
|
||||
@ -40,23 +40,21 @@ repo-consistency:
|
||||
# this target runs checks on all files
|
||||
|
||||
quality:
|
||||
black --check $(check_dirs)
|
||||
ruff $(check_dirs)
|
||||
doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
|
||||
ruff check $(check_dirs) setup.py
|
||||
ruff format --check $(check_dirs) setup.py
|
||||
python utils/check_doc_toc.py
|
||||
|
||||
# Format source code automatically and check is there are any problems left that need manual fixing
|
||||
|
||||
extra_style_checks:
|
||||
python utils/custom_init_isort.py
|
||||
doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source
|
||||
python utils/check_doc_toc.py --fix_and_overwrite
|
||||
|
||||
# this target runs checks on all files and potentially modifies some of them
|
||||
|
||||
style:
|
||||
black $(check_dirs)
|
||||
ruff $(check_dirs) --fix
|
||||
ruff check $(check_dirs) setup.py --fix
|
||||
ruff format $(check_dirs) setup.py
|
||||
${MAKE} autogenerate_code
|
||||
${MAKE} extra_style_checks
|
||||
|
||||
|
||||
@ -65,6 +65,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -564,9 +564,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = (
|
||||
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
)
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
|
||||
@ -469,9 +469,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
|
||||
elif beta_schedule == "scaled_linear":
|
||||
# this schedule is very specific to the latent diffusion model.
|
||||
self.betas = (
|
||||
torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
)
|
||||
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
|
||||
elif beta_schedule == "squaredcos_cap_v2":
|
||||
# Glide cosine schedule
|
||||
self.betas = betas_for_alpha_bar(num_train_timesteps)
|
||||
|
||||
@ -56,10 +56,10 @@ def parse_prompt_attention(text):
|
||||
(abc) - increases attention to abc by a multiplier of 1.1
|
||||
(abc:3.12) - increases attention to abc by a multiplier of 3.12
|
||||
[abc] - decreases attention to abc by a multiplier of 1.1
|
||||
\( - literal character '('
|
||||
\[ - literal character '['
|
||||
\) - literal character ')'
|
||||
\] - literal character ']'
|
||||
\\( - literal character '('
|
||||
\\[ - literal character '['
|
||||
\\) - literal character ')'
|
||||
\\] - literal character ']'
|
||||
\\ - literal character '\'
|
||||
anything else - just text
|
||||
>>> parse_prompt_attention('normal text')
|
||||
@ -68,7 +68,7 @@ def parse_prompt_attention(text):
|
||||
[['an ', 1.0], ['important', 1.1], [' word', 1.0]]
|
||||
>>> parse_prompt_attention('(unbalanced')
|
||||
[['unbalanced', 1.1]]
|
||||
>>> parse_prompt_attention('\(literal\]')
|
||||
>>> parse_prompt_attention('\\(literal\\]')
|
||||
[['(literal]', 1.0]]
|
||||
>>> parse_prompt_attention('(unnecessary)(parens)')
|
||||
[['unnecessaryparens', 1.1]]
|
||||
|
||||
@ -82,10 +82,10 @@ def parse_prompt_attention(text):
|
||||
(abc) - increases attention to abc by a multiplier of 1.1
|
||||
(abc:3.12) - increases attention to abc by a multiplier of 3.12
|
||||
[abc] - decreases attention to abc by a multiplier of 1.1
|
||||
\( - literal character '('
|
||||
\[ - literal character '['
|
||||
\) - literal character ')'
|
||||
\] - literal character ']'
|
||||
\\( - literal character '('
|
||||
\\[ - literal character '['
|
||||
\\) - literal character ')'
|
||||
\\] - literal character ']'
|
||||
\\ - literal character '\'
|
||||
anything else - just text
|
||||
>>> parse_prompt_attention('normal text')
|
||||
@ -94,7 +94,7 @@ def parse_prompt_attention(text):
|
||||
[['an ', 1.0], ['important', 1.1], [' word', 1.0]]
|
||||
>>> parse_prompt_attention('(unbalanced')
|
||||
[['unbalanced', 1.1]]
|
||||
>>> parse_prompt_attention('\(literal\]')
|
||||
>>> parse_prompt_attention('\\(literal\\]')
|
||||
[['(literal]', 1.0]]
|
||||
>>> parse_prompt_attention('(unnecessary)(parens)')
|
||||
[['unnecessaryparens', 1.1]]
|
||||
@ -433,6 +433,7 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(OnnxStableDiffusionPipeline
|
||||
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
||||
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
||||
"""
|
||||
|
||||
if version.parse(version.parse(diffusers.__version__).base_version) >= version.parse("0.9.0"):
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -46,10 +46,10 @@ def parse_prompt_attention(text):
|
||||
(abc) - increases attention to abc by a multiplier of 1.1
|
||||
(abc:3.12) - increases attention to abc by a multiplier of 3.12
|
||||
[abc] - decreases attention to abc by a multiplier of 1.1
|
||||
\( - literal character '('
|
||||
\[ - literal character '['
|
||||
\) - literal character ')'
|
||||
\] - literal character ']'
|
||||
\\( - literal character '('
|
||||
\\[ - literal character '['
|
||||
\\) - literal character ')'
|
||||
\\] - literal character ']'
|
||||
\\ - literal character '\'
|
||||
anything else - just text
|
||||
|
||||
@ -59,7 +59,7 @@ def parse_prompt_attention(text):
|
||||
[['an ', 1.0], ['important', 1.1], [' word', 1.0]]
|
||||
>>> parse_prompt_attention('(unbalanced')
|
||||
[['unbalanced', 1.1]]
|
||||
>>> parse_prompt_attention('\(literal\]')
|
||||
>>> parse_prompt_attention('\\(literal\\]')
|
||||
[['(literal]', 1.0]]
|
||||
>>> parse_prompt_attention('(unnecessary)(parens)')
|
||||
[['unnecessaryparens', 1.1]]
|
||||
|
||||
@ -127,9 +127,9 @@ class MagicMixPipeline(DiffusionPipeline):
|
||||
timesteps=t,
|
||||
)
|
||||
|
||||
input = (mix_factor * latents) + (
|
||||
1 - mix_factor
|
||||
) * orig_latents # interpolating between layout noise and conditionally generated noise to preserve layout sematics
|
||||
input = (
|
||||
(mix_factor * latents) + (1 - mix_factor) * orig_latents
|
||||
) # interpolating between layout noise and conditionally generated noise to preserve layout sematics
|
||||
input = torch.cat([input] * 2)
|
||||
|
||||
else: # content generation phase
|
||||
|
||||
@ -453,9 +453,7 @@ class StableDiffusionCanvasPipeline(DiffusionPipeline):
|
||||
:,
|
||||
region.latent_row_init : region.latent_row_end,
|
||||
region.latent_col_init : region.latent_col_end,
|
||||
] += (
|
||||
noise_pred_region * mask_weights_region
|
||||
)
|
||||
] += noise_pred_region * mask_weights_region
|
||||
contributors[
|
||||
:,
|
||||
:,
|
||||
|
||||
@ -65,6 +65,7 @@ class Prompt2PromptPipeline(StableDiffusionPipeline):
|
||||
feature_extractor ([`CLIPFeatureExtractor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
@torch.no_grad()
|
||||
|
||||
@ -94,6 +94,7 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline):
|
||||
cc_projection ([`CCProjection`]):
|
||||
Projection layer to project the concated CLIP features and pose embeddings to the original CLIP feature size.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
def __init__(
|
||||
@ -658,7 +659,8 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline):
|
||||
|
||||
if isinstance(generator, list):
|
||||
init_latents = [
|
||||
self.vae.encode(image[i : i + 1]).latent_dist.mode(generator[i]) for i in range(batch_size) # sample
|
||||
self.vae.encode(image[i : i + 1]).latent_dist.mode(generator[i])
|
||||
for i in range(batch_size) # sample
|
||||
]
|
||||
init_latents = torch.cat(init_latents, dim=0)
|
||||
else:
|
||||
|
||||
@ -651,9 +651,10 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = num_controlnet
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
|
||||
@ -755,9 +755,10 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = num_controlnet
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
|
||||
@ -68,6 +68,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -89,6 +89,7 @@ class StableDiffusionIPEXPipeline(DiffusionPipeline, TextualInversionLoaderMixin
|
||||
feature_extractor ([`CLIPFeatureExtractor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -50,6 +50,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -170,6 +170,7 @@ class StableDiffusionRepaintPipeline(DiffusionPipeline, TextualInversionLoaderMi
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -464,9 +464,7 @@ def main(args):
|
||||
unet = gemini_zero_dpp(unet, args.placement)
|
||||
|
||||
# config optimizer for colossalai zero
|
||||
optimizer = GeminiAdamOptimizer(
|
||||
unet, lr=args.learning_rate, initial_scale=2**5, clipping_norm=args.max_grad_norm
|
||||
)
|
||||
optimizer = GeminiAdamOptimizer(unet, lr=args.learning_rate, initial_scale=2**5, clipping_norm=args.max_grad_norm)
|
||||
|
||||
# load noise_scheduler
|
||||
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
||||
|
||||
@ -1,10 +1,6 @@
|
||||
[tool.black]
|
||||
line-length = 119
|
||||
target-version = ['py37']
|
||||
|
||||
[tool.ruff]
|
||||
# Never enforce `E501` (line length violations).
|
||||
ignore = ["C901", "E501", "E741", "W605"]
|
||||
ignore = ["C901", "E501", "E741", "F402", "F823"]
|
||||
select = ["C", "E", "F", "I", "W"]
|
||||
line-length = 119
|
||||
|
||||
@ -16,3 +12,16 @@ line-length = 119
|
||||
[tool.ruff.isort]
|
||||
lines-after-imports = 2
|
||||
known-first-party = ["diffusers"]
|
||||
|
||||
[tool.ruff.format]
|
||||
# Like Black, use double quotes for strings.
|
||||
quote-style = "double"
|
||||
|
||||
# Like Black, indent with spaces, rather than tabs.
|
||||
indent-style = "space"
|
||||
|
||||
# Like Black, respect magic trailing commas.
|
||||
skip-magic-trailing-comma = false
|
||||
|
||||
# Like Black, automatically detect the appropriate line ending.
|
||||
line-ending = "auto"
|
||||
|
||||
@ -11,7 +11,7 @@ from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.schedulers.scheduling_unclip import UnCLIPScheduler
|
||||
|
||||
|
||||
"""
|
||||
r"""
|
||||
Example - From the diffusers root directory:
|
||||
|
||||
Download weights:
|
||||
|
||||
20
setup.cfg
20
setup.cfg
@ -1,20 +0,0 @@
|
||||
[isort]
|
||||
default_section = FIRSTPARTY
|
||||
ensure_newline_before_comments = True
|
||||
force_grid_wrap = 0
|
||||
include_trailing_comma = True
|
||||
known_first_party = accelerate
|
||||
known_third_party =
|
||||
numpy
|
||||
torch
|
||||
torch_xla
|
||||
|
||||
line_length = 119
|
||||
lines_after_imports = 2
|
||||
multi_line_output = 3
|
||||
use_parentheses = True
|
||||
|
||||
[flake8]
|
||||
ignore = E203, E722, E501, E741, W503, W605
|
||||
max-line-length = 119
|
||||
per-file-ignores = __init__.py:F401
|
||||
26
setup.py
26
setup.py
@ -44,9 +44,9 @@ To create the package for PyPI.
|
||||
For the sources, run: "python setup.py sdist"
|
||||
You should now have a /dist directory with both .whl and .tar.gz source versions.
|
||||
|
||||
Long story cut short, you need to run both before you can upload the distribution to the
|
||||
test PyPI and the actual PyPI servers:
|
||||
|
||||
Long story cut short, you need to run both before you can upload the distribution to the
|
||||
test PyPI and the actual PyPI servers:
|
||||
|
||||
python setup.py bdist_wheel && python setup.py sdist
|
||||
|
||||
8. Check that everything looks correct by uploading the package to the PyPI test server:
|
||||
@ -78,9 +78,9 @@ To create the package for PyPI.
|
||||
you need to go back to main before executing this.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from distutils.core import Command
|
||||
|
||||
from setuptools import find_packages, setup
|
||||
@ -93,7 +93,6 @@ _deps = [
|
||||
"Pillow", # keep the PIL.Image.Resampling deprecation away
|
||||
"accelerate>=0.11.0",
|
||||
"compel==0.1.8",
|
||||
"black~=23.1",
|
||||
"datasets",
|
||||
"filelock",
|
||||
"flax>=0.4.1",
|
||||
@ -119,7 +118,7 @@ _deps = [
|
||||
"pytest-timeout",
|
||||
"pytest-xdist",
|
||||
"python>=3.8.0",
|
||||
"ruff==0.0.280",
|
||||
"ruff>=0.1.5,<=0.2",
|
||||
"safetensors>=0.3.1",
|
||||
"sentencepiece>=0.1.91,!=0.1.92",
|
||||
"scipy",
|
||||
@ -171,7 +170,11 @@ class DepsTableUpdateCommand(Command):
|
||||
description = "build runtime dependency table"
|
||||
user_options = [
|
||||
# format: (long option, short option, description).
|
||||
("dep-table-update", None, "updates src/diffusers/dependency_versions_table.py"),
|
||||
(
|
||||
"dep-table-update",
|
||||
None,
|
||||
"updates src/diffusers/dependency_versions_table.py",
|
||||
),
|
||||
]
|
||||
|
||||
def initialize_options(self):
|
||||
@ -197,10 +200,8 @@ class DepsTableUpdateCommand(Command):
|
||||
f.write("\n".join(content))
|
||||
|
||||
|
||||
|
||||
|
||||
extras = {}
|
||||
extras["quality"] = deps_list("urllib3", "black", "isort", "ruff", "hf-doc-builder")
|
||||
extras["quality"] = deps_list("urllib3", "isort", "ruff", "hf-doc-builder")
|
||||
extras["docs"] = deps_list("hf-doc-builder")
|
||||
extras["training"] = deps_list("accelerate", "datasets", "protobuf", "tensorboard", "Jinja2")
|
||||
extras["test"] = deps_list(
|
||||
@ -275,10 +276,7 @@ setup(
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Programming Language :: Python :: 3",
|
||||
]
|
||||
+ [
|
||||
f"Programming Language :: Python :: 3.{i}"
|
||||
for i in range(8, version_range_max)
|
||||
],
|
||||
+ [f"Programming Language :: Python :: 3.{i}" for i in range(8, version_range_max)],
|
||||
cmdclass={"deps_table_update": DepsTableUpdateCommand},
|
||||
)
|
||||
|
||||
|
||||
@ -95,6 +95,7 @@ class ConfigMixin:
|
||||
should only have a `kwargs` argument if at least one argument is deprecated (should be overridden by
|
||||
subclass).
|
||||
"""
|
||||
|
||||
config_name = None
|
||||
ignore_for_config = []
|
||||
has_compatibles = False
|
||||
|
||||
@ -5,7 +5,6 @@ deps = {
|
||||
"Pillow": "Pillow",
|
||||
"accelerate": "accelerate>=0.11.0",
|
||||
"compel": "compel==0.1.8",
|
||||
"black": "black~=23.1",
|
||||
"datasets": "datasets",
|
||||
"filelock": "filelock",
|
||||
"flax": "flax>=0.4.1",
|
||||
@ -31,7 +30,7 @@ deps = {
|
||||
"pytest-timeout": "pytest-timeout",
|
||||
"pytest-xdist": "pytest-xdist",
|
||||
"python": "python>=3.8.0",
|
||||
"ruff": "ruff==0.0.280",
|
||||
"ruff": "ruff>=0.1.5,<=0.2",
|
||||
"safetensors": "safetensors>=0.3.1",
|
||||
"sentencepiece": "sentencepiece>=0.1.91,!=0.1.92",
|
||||
"scipy": "scipy",
|
||||
|
||||
@ -71,6 +71,7 @@ class LoraLoaderMixin:
|
||||
Load LoRA layers into [`UNet2DConditionModel`] and
|
||||
[`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel).
|
||||
"""
|
||||
|
||||
text_encoder_name = TEXT_ENCODER_NAME
|
||||
unet_name = UNET_NAME
|
||||
num_fused_loras = 0
|
||||
|
||||
@ -110,7 +110,10 @@ def jax_memory_efficient_attention(
|
||||
)
|
||||
|
||||
_, res = jax.lax.scan(
|
||||
f=chunk_scanner, init=0, xs=None, length=math.ceil(num_q / query_chunk_size) # start counter # stop counter
|
||||
f=chunk_scanner,
|
||||
init=0,
|
||||
xs=None,
|
||||
length=math.ceil(num_q / query_chunk_size), # start counter # stop counter
|
||||
)
|
||||
|
||||
return jnp.concatenate(res, axis=-3) # fuse the chunked result back
|
||||
@ -138,6 +141,7 @@ class FlaxAttention(nn.Module):
|
||||
Parameters `dtype`
|
||||
|
||||
"""
|
||||
|
||||
query_dim: int
|
||||
heads: int = 8
|
||||
dim_head: int = 64
|
||||
@ -262,6 +266,7 @@ class FlaxBasicTransformerBlock(nn.Module):
|
||||
Whether to split the head dimension into a new axis for the self-attention computation. In most cases,
|
||||
enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL.
|
||||
"""
|
||||
|
||||
dim: int
|
||||
n_heads: int
|
||||
d_head: int
|
||||
@ -347,6 +352,7 @@ class FlaxTransformer2DModel(nn.Module):
|
||||
Whether to split the head dimension into a new axis for the self-attention computation. In most cases,
|
||||
enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL.
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
n_heads: int
|
||||
d_head: int
|
||||
@ -442,6 +448,7 @@ class FlaxFeedForward(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
dim: int
|
||||
dropout: float = 0.0
|
||||
dtype: jnp.dtype = jnp.float32
|
||||
@ -471,6 +478,7 @@ class FlaxGEGLU(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
dim: int
|
||||
dropout: float = 0.0
|
||||
dtype: jnp.dtype = jnp.float32
|
||||
|
||||
@ -91,6 +91,7 @@ class AutoencoderTiny(ModelMixin, ConfigMixin):
|
||||
`force_upcast` can be set to `False` (see this fp16-friendly
|
||||
[AutoEncoder](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix)).
|
||||
"""
|
||||
|
||||
_supports_gradient_checkpointing = True
|
||||
|
||||
@register_to_config
|
||||
|
||||
@ -146,6 +146,7 @@ class FlaxControlNetModel(nn.Module, FlaxModelMixin, ConfigMixin):
|
||||
conditioning_embedding_out_channels (`tuple`, *optional*, defaults to `(16, 32, 96, 256)`):
|
||||
The tuple of output channel for each block in the `conditioning_embedding` layer.
|
||||
"""
|
||||
|
||||
sample_size: int = 32
|
||||
in_channels: int = 4
|
||||
down_block_types: Tuple[str, ...] = (
|
||||
|
||||
@ -65,6 +65,7 @@ class FlaxTimestepEmbedding(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
time_embed_dim: int = 32
|
||||
dtype: jnp.dtype = jnp.float32
|
||||
|
||||
@ -84,6 +85,7 @@ class FlaxTimesteps(nn.Module):
|
||||
dim (`int`, *optional*, defaults to `32`):
|
||||
Time step embedding dimension
|
||||
"""
|
||||
|
||||
dim: int = 32
|
||||
flip_sin_to_cos: bool = False
|
||||
freq_shift: float = 1
|
||||
|
||||
@ -52,6 +52,7 @@ class FlaxModelMixin(PushToHubMixin):
|
||||
|
||||
- **config_name** ([`str`]) -- Filename to save a model to when calling [`~FlaxModelMixin.save_pretrained`].
|
||||
"""
|
||||
|
||||
config_name = CONFIG_NAME
|
||||
_automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
|
||||
_flax_internal_args = ["name", "parent", "dtype"]
|
||||
|
||||
@ -193,6 +193,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
|
||||
- **config_name** ([`str`]) -- Filename to save a model to when calling [`~models.ModelMixin.save_pretrained`].
|
||||
"""
|
||||
|
||||
config_name = CONFIG_NAME
|
||||
_automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
|
||||
_supports_gradient_checkpointing = False
|
||||
|
||||
@ -45,6 +45,7 @@ class FlaxCrossAttnDownBlock2D(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
out_channels: int
|
||||
dropout: float = 0.0
|
||||
@ -125,6 +126,7 @@ class FlaxDownBlock2D(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
out_channels: int
|
||||
dropout: float = 0.0
|
||||
@ -190,6 +192,7 @@ class FlaxCrossAttnUpBlock2D(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
out_channels: int
|
||||
prev_output_channel: int
|
||||
@ -275,6 +278,7 @@ class FlaxUpBlock2D(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
out_channels: int
|
||||
prev_output_channel: int
|
||||
@ -339,6 +343,7 @@ class FlaxUNetMidBlock2DCrossAttn(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
dropout: float = 0.0
|
||||
num_layers: int = 1
|
||||
|
||||
@ -174,6 +174,7 @@ class UNetMotionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
|
||||
This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
|
||||
for all models (such as downloading or saving).
|
||||
"""
|
||||
|
||||
_supports_gradient_checkpointing = True
|
||||
|
||||
@register_to_config
|
||||
|
||||
@ -214,6 +214,7 @@ class FlaxAttentionBlock(nn.Module):
|
||||
Parameters `dtype`
|
||||
|
||||
"""
|
||||
|
||||
channels: int
|
||||
num_head_channels: int = None
|
||||
num_groups: int = 32
|
||||
@ -291,6 +292,7 @@ class FlaxDownEncoderBlock2D(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
out_channels: int
|
||||
dropout: float = 0.0
|
||||
@ -347,6 +349,7 @@ class FlaxUpDecoderBlock2D(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
out_channels: int
|
||||
dropout: float = 0.0
|
||||
@ -401,6 +404,7 @@ class FlaxUNetMidBlock2D(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int
|
||||
dropout: float = 0.0
|
||||
num_layers: int = 1
|
||||
@ -488,6 +492,7 @@ class FlaxEncoder(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
Parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int = 3
|
||||
out_channels: int = 3
|
||||
down_block_types: Tuple[str] = ("DownEncoderBlock2D",)
|
||||
@ -600,6 +605,7 @@ class FlaxDecoder(nn.Module):
|
||||
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
||||
parameters `dtype`
|
||||
"""
|
||||
|
||||
in_channels: int = 3
|
||||
out_channels: int = 3
|
||||
up_block_types: Tuple[str] = ("UpDecoderBlock2D",)
|
||||
@ -767,6 +773,7 @@ class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin):
|
||||
dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
|
||||
The `dtype` of the parameters.
|
||||
"""
|
||||
|
||||
in_channels: int = 3
|
||||
out_channels: int = 3
|
||||
down_block_types: Tuple[str] = ("DownEncoderBlock2D",)
|
||||
|
||||
@ -243,10 +243,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
lora_scale: Optional[float] = None,
|
||||
**kwargs,
|
||||
):
|
||||
deprecation_message = (
|
||||
"`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()`"
|
||||
" instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
|
||||
)
|
||||
deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
|
||||
deprecate("_encode_prompt()", "1.0.0", deprecation_message, standard_warn=False)
|
||||
|
||||
prompt_embeds_tuple = self.encode_prompt(
|
||||
@ -462,10 +459,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
return image, has_nsfw_concept
|
||||
|
||||
def decode_latents(self, latents):
|
||||
deprecation_message = (
|
||||
"The decode_latents method is deprecated and will be removed in 1.0.0. Please use"
|
||||
" VaeImageProcessor.postprocess(...) instead"
|
||||
)
|
||||
deprecation_message = "The decode_latents method is deprecated and will be removed in 1.0.0. Please use VaeImageProcessor.postprocess(...) instead"
|
||||
deprecate("decode_latents", "1.0.0", deprecation_message, standard_warn=False)
|
||||
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
@ -515,8 +509,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
|
||||
):
|
||||
raise ValueError(
|
||||
f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found"
|
||||
f" {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
|
||||
f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
|
||||
)
|
||||
|
||||
if prompt is not None and prompt_embeds is not None:
|
||||
@ -747,15 +740,13 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
deprecate(
|
||||
"callback",
|
||||
"1.0.0",
|
||||
"Passing `callback` as an input argument to `__call__` is deprecated, consider using"
|
||||
" `callback_on_step_end`",
|
||||
"Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
||||
)
|
||||
if callback_steps is not None:
|
||||
deprecate(
|
||||
"callback_steps",
|
||||
"1.0.0",
|
||||
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using"
|
||||
" `callback_on_step_end`",
|
||||
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
||||
)
|
||||
|
||||
# 0. Default height and width to unet
|
||||
|
||||
@ -252,10 +252,7 @@ class AltDiffusionImg2ImgPipeline(
|
||||
lora_scale: Optional[float] = None,
|
||||
**kwargs,
|
||||
):
|
||||
deprecation_message = (
|
||||
"`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()`"
|
||||
" instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
|
||||
)
|
||||
deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
|
||||
deprecate("_encode_prompt()", "1.0.0", deprecation_message, standard_warn=False)
|
||||
|
||||
prompt_embeds_tuple = self.encode_prompt(
|
||||
@ -471,10 +468,7 @@ class AltDiffusionImg2ImgPipeline(
|
||||
return image, has_nsfw_concept
|
||||
|
||||
def decode_latents(self, latents):
|
||||
deprecation_message = (
|
||||
"The decode_latents method is deprecated and will be removed in 1.0.0. Please use"
|
||||
" VaeImageProcessor.postprocess(...) instead"
|
||||
)
|
||||
deprecation_message = "The decode_latents method is deprecated and will be removed in 1.0.0. Please use VaeImageProcessor.postprocess(...) instead"
|
||||
deprecate("decode_latents", "1.0.0", deprecation_message, standard_warn=False)
|
||||
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
@ -524,8 +518,7 @@ class AltDiffusionImg2ImgPipeline(
|
||||
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
|
||||
):
|
||||
raise ValueError(
|
||||
f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found"
|
||||
f" {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
|
||||
f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
|
||||
)
|
||||
if prompt is not None and prompt_embeds is not None:
|
||||
raise ValueError(
|
||||
@ -578,8 +571,8 @@ class AltDiffusionImg2ImgPipeline(
|
||||
else:
|
||||
if isinstance(generator, list) and len(generator) != batch_size:
|
||||
raise ValueError(
|
||||
f"You have passed a list of generators of length {len(generator)}, but requested an effective"
|
||||
f" batch size of {batch_size}. Make sure the batch size matches the length of the generators."
|
||||
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
||||
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
||||
)
|
||||
|
||||
elif isinstance(generator, list):
|
||||
@ -798,15 +791,13 @@ class AltDiffusionImg2ImgPipeline(
|
||||
deprecate(
|
||||
"callback",
|
||||
"1.0.0",
|
||||
"Passing `callback` as an input argument to `__call__` is deprecated, consider use"
|
||||
" `callback_on_step_end`",
|
||||
"Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
||||
)
|
||||
if callback_steps is not None:
|
||||
deprecate(
|
||||
"callback_steps",
|
||||
"1.0.0",
|
||||
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use"
|
||||
" `callback_on_step_end`",
|
||||
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
|
||||
@ -99,6 +99,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLo
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
|
||||
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -72,6 +72,7 @@ class AudioLDMPipeline(DiffusionPipeline):
|
||||
vocoder ([`~transformers.SpeechT5HifiGan`]):
|
||||
Vocoder of class `SpeechT5HifiGan`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -181,6 +181,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
||||
diffusion pipeline's components.
|
||||
|
||||
"""
|
||||
|
||||
config_name = "model_index.json"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -451,6 +452,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
||||
diffusion pipeline's components.
|
||||
|
||||
"""
|
||||
|
||||
config_name = "model_index.json"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -726,6 +728,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
||||
diffusion pipeline's components.
|
||||
|
||||
"""
|
||||
|
||||
config_name = "model_index.json"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
@ -74,6 +74,7 @@ class ConsistencyModelPipeline(DiffusionPipeline):
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Currently only
|
||||
compatible with [`CMStochasticIterativeScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "unet"
|
||||
|
||||
def __init__(self, unet: UNet2DModel, scheduler: CMStochasticIterativeScheduler) -> None:
|
||||
|
||||
@ -126,6 +126,7 @@ class StableDiffusionControlNetPipeline(
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
@ -886,9 +887,10 @@ class StableDiffusionControlNetPipeline(
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
|
||||
@ -160,6 +160,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
@ -933,9 +934,10 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
|
||||
@ -282,6 +282,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
@ -1142,9 +1143,10 @@ class StableDiffusionControlNetInpaintPipeline(
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
|
||||
@ -164,6 +164,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
|
||||
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
|
||||
@ -1198,9 +1199,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# # 0.0 Default height and width to unet
|
||||
# height = height or self.unet.config.sample_size * self.vae_scale_factor
|
||||
@ -1213,9 +1215,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs
|
||||
self.check_inputs(
|
||||
|
||||
@ -139,6 +139,7 @@ class StableDiffusionXLControlNetPipeline(
|
||||
watermark output images. If not defined, it defaults to `True` if the package is installed; otherwise no
|
||||
watermarker is used.
|
||||
"""
|
||||
|
||||
# leave controlnet out on purpose because it iterates with unet
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
@ -1005,9 +1006,10 @@ class StableDiffusionXLControlNetPipeline(
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
|
||||
@ -192,6 +192,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
||||
watermark output images. If not defined, it will default to True if the package is installed, otherwise no
|
||||
watermarker will be used.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
|
||||
@ -1154,9 +1155,10 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
||||
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
||||
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
||||
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
|
||||
control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
|
||||
control_guidance_end
|
||||
]
|
||||
control_guidance_start, control_guidance_end = (
|
||||
mult * [control_guidance_start],
|
||||
mult * [control_guidance_end],
|
||||
)
|
||||
|
||||
# 1. Check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
|
||||
@ -39,6 +39,7 @@ class DanceDiffusionPipeline(DiffusionPipeline):
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded audio latents. Can be one of
|
||||
[`IPNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "unet"
|
||||
|
||||
def __init__(self, unet, scheduler):
|
||||
|
||||
@ -35,6 +35,7 @@ class DDIMPipeline(DiffusionPipeline):
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
|
||||
[`DDPMScheduler`], or [`DDIMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "unet"
|
||||
|
||||
def __init__(self, unet, scheduler):
|
||||
|
||||
@ -35,6 +35,7 @@ class DDPMPipeline(DiffusionPipeline):
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
|
||||
[`DDPMScheduler`], or [`DDIMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "unet"
|
||||
|
||||
def __init__(self, unet, scheduler):
|
||||
|
||||
@ -98,7 +98,19 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
watermarker: Optional[IFWatermarker]
|
||||
|
||||
bad_punct_regex = re.compile(
|
||||
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
|
||||
r"["
|
||||
+ "#®•©™&@·º½¾¿¡§~"
|
||||
+ r"\)"
|
||||
+ r"\("
|
||||
+ r"\]"
|
||||
+ r"\["
|
||||
+ r"\}"
|
||||
+ r"\{"
|
||||
+ r"\|"
|
||||
+ "\\"
|
||||
+ r"\/"
|
||||
+ r"\*"
|
||||
+ r"]{1,}"
|
||||
) # noqa
|
||||
|
||||
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
|
||||
|
||||
@ -122,7 +122,19 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
watermarker: Optional[IFWatermarker]
|
||||
|
||||
bad_punct_regex = re.compile(
|
||||
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
|
||||
r"["
|
||||
+ "#®•©™&@·º½¾¿¡§~"
|
||||
+ r"\)"
|
||||
+ r"\("
|
||||
+ r"\]"
|
||||
+ r"\["
|
||||
+ r"\}"
|
||||
+ r"\{"
|
||||
+ r"\|"
|
||||
+ "\\"
|
||||
+ r"\/"
|
||||
+ r"\*"
|
||||
+ r"]{1,}"
|
||||
) # noqa
|
||||
|
||||
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
|
||||
|
||||
@ -126,7 +126,19 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
watermarker: Optional[IFWatermarker]
|
||||
|
||||
bad_punct_regex = re.compile(
|
||||
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
|
||||
r"["
|
||||
+ "#®•©™&@·º½¾¿¡§~"
|
||||
+ r"\)"
|
||||
+ r"\("
|
||||
+ r"\]"
|
||||
+ r"\["
|
||||
+ r"\}"
|
||||
+ r"\{"
|
||||
+ r"\|"
|
||||
+ "\\"
|
||||
+ r"\/"
|
||||
+ r"\*"
|
||||
+ r"]{1,}"
|
||||
) # noqa
|
||||
|
||||
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor"]
|
||||
|
||||
@ -125,7 +125,19 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
watermarker: Optional[IFWatermarker]
|
||||
|
||||
bad_punct_regex = re.compile(
|
||||
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
|
||||
r"["
|
||||
+ "#®•©™&@·º½¾¿¡§~"
|
||||
+ r"\)"
|
||||
+ r"\("
|
||||
+ r"\]"
|
||||
+ r"\["
|
||||
+ r"\}"
|
||||
+ r"\{"
|
||||
+ r"\|"
|
||||
+ "\\"
|
||||
+ r"\/"
|
||||
+ r"\*"
|
||||
+ r"]{1,}"
|
||||
) # noqa
|
||||
|
||||
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
|
||||
|
||||
@ -128,7 +128,19 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
watermarker: Optional[IFWatermarker]
|
||||
|
||||
bad_punct_regex = re.compile(
|
||||
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
|
||||
r"["
|
||||
+ "#®•©™&@·º½¾¿¡§~"
|
||||
+ r"\)"
|
||||
+ r"\("
|
||||
+ r"\]"
|
||||
+ r"\["
|
||||
+ r"\}"
|
||||
+ r"\{"
|
||||
+ r"\|"
|
||||
+ "\\"
|
||||
+ r"\/"
|
||||
+ r"\*"
|
||||
+ r"]{1,}"
|
||||
) # noqa
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet"
|
||||
|
||||
@ -84,7 +84,19 @@ class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
watermarker: Optional[IFWatermarker]
|
||||
|
||||
bad_punct_regex = re.compile(
|
||||
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
|
||||
r"["
|
||||
+ "#®•©™&@·º½¾¿¡§~"
|
||||
+ r"\)"
|
||||
+ r"\("
|
||||
+ r"\]"
|
||||
+ r"\["
|
||||
+ r"\}"
|
||||
+ r"\{"
|
||||
+ r"\|"
|
||||
+ "\\"
|
||||
+ r"\/"
|
||||
+ r"\*"
|
||||
+ r"]{1,}"
|
||||
) # noqa
|
||||
|
||||
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
|
||||
|
||||
@ -43,6 +43,7 @@ class DiTPipeline(DiffusionPipeline):
|
||||
scheduler ([`DDIMScheduler`]):
|
||||
A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "transformer->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -115,6 +115,7 @@ class LatentConsistencyModelImg2ImgPipeline(
|
||||
requires_safety_checker (`bool`, *optional*, defaults to `True`):
|
||||
Whether the pipeline requires a safety checker component.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -97,6 +97,7 @@ class LatentConsistencyModelPipeline(
|
||||
requires_safety_checker (`bool`, *optional*, defaults to `True`):
|
||||
Whether the pipeline requires a safety checker component.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -49,6 +49,7 @@ class LDMTextToImagePipeline(DiffusionPipeline):
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
|
||||
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "bert->unet->vqvae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -177,6 +177,7 @@ class PaintByExamplePipeline(DiffusionPipeline):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
|
||||
"""
|
||||
|
||||
# TODO: feature_extractor is required to encode initial images (if they are in PIL format),
|
||||
# we should give a descriptive message if the pipeline doesn't have one.
|
||||
|
||||
|
||||
@ -112,6 +112,7 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
- **config_name** ([`str`]) -- The configuration filename that stores the class and module names of all the
|
||||
diffusion pipeline's components.
|
||||
"""
|
||||
|
||||
config_name = "model_index.json"
|
||||
|
||||
def register_modules(self, **kwargs):
|
||||
|
||||
@ -542,6 +542,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
- **_optional_components** (`List[str]`) -- List of all optional components that don't have to be passed to the
|
||||
pipeline to function (should be overridden by subclasses).
|
||||
"""
|
||||
|
||||
config_name = "model_index.json"
|
||||
model_cpu_offload_seq = None
|
||||
_optional_components = []
|
||||
|
||||
@ -120,8 +120,21 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
scheduler ([`SchedulerMixin`]):
|
||||
A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
|
||||
"""
|
||||
|
||||
bad_punct_regex = re.compile(
|
||||
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
|
||||
r"["
|
||||
+ "#®•©™&@·º½¾¿¡§~"
|
||||
+ r"\)"
|
||||
+ r"\("
|
||||
+ r"\]"
|
||||
+ r"\["
|
||||
+ r"\}"
|
||||
+ r"\{"
|
||||
+ r"\|"
|
||||
+ "\\"
|
||||
+ r"\/"
|
||||
+ r"\*"
|
||||
+ r"]{1,}"
|
||||
) # noqa
|
||||
|
||||
_optional_components = ["tokenizer", "text_encoder"]
|
||||
|
||||
@ -35,6 +35,7 @@ class ScoreSdeVePipeline(DiffusionPipeline):
|
||||
scheduler ([`ScoreSdeVeScheduler`]):
|
||||
A `ScoreSdeVeScheduler` to be used in combination with `unet` to denoise the encoded image.
|
||||
"""
|
||||
|
||||
unet: UNet2DModel
|
||||
scheduler: ScoreSdeVeScheduler
|
||||
|
||||
|
||||
@ -54,6 +54,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
|
||||
A scheduler to be used in combination with `decoder` to denoise the encoded audio latents.
|
||||
melgan ([`OnnxRuntimeModel`]):
|
||||
"""
|
||||
|
||||
_optional_components = ["melgan"]
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -148,6 +148,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
|
||||
@ -33,10 +33,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64
|
||||
def preprocess(image):
|
||||
deprecation_message = (
|
||||
"The preprocess method is deprecated and will be removed in diffusers 1.0.0. Please use"
|
||||
" VaeImageProcessor.preprocess(...) instead"
|
||||
)
|
||||
deprecation_message = "The preprocess method is deprecated and will be removed in diffusers 1.0.0. Please use VaeImageProcessor.preprocess(...) instead"
|
||||
deprecate("preprocess", "1.0.0", deprecation_message, standard_warn=False)
|
||||
if isinstance(image, torch.Tensor):
|
||||
return image
|
||||
@ -85,6 +82,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
vae_encoder: OnnxRuntimeModel
|
||||
vae_decoder: OnnxRuntimeModel
|
||||
text_encoder: OnnxRuntimeModel
|
||||
|
||||
@ -80,6 +80,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
vae_encoder: OnnxRuntimeModel
|
||||
vae_decoder: OnnxRuntimeModel
|
||||
text_encoder: OnnxRuntimeModel
|
||||
|
||||
@ -66,6 +66,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_is_onnx = True
|
||||
|
||||
|
||||
@ -102,6 +102,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -196,6 +196,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -95,6 +95,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
|
||||
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "depth_mask"]
|
||||
|
||||
|
||||
@ -273,6 +273,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor", "inverse_scheduler"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -125,6 +125,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -177,6 +177,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -62,6 +62,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
# TODO: feature_extractor is required to encode images (if they are in PIL format),
|
||||
# we should give a descriptive message if the pipeline doesn't have one.
|
||||
_optional_components = ["safety_checker"]
|
||||
|
||||
@ -139,6 +139,7 @@ class StableDiffusionImg2ImgPipeline(
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -202,6 +202,7 @@ class StableDiffusionInpaintPipeline(
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -115,6 +115,7 @@ class StableDiffusionInpaintPipelineLegacy(
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -89,6 +89,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -80,6 +80,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
|
||||
feature_extractor ([`CLIPImageProcessor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -79,6 +79,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, FromSingleFileMixi
|
||||
scheduler ([`SchedulerMixin`]):
|
||||
A [`EulerDiscreteScheduler`] to be used in combination with `unet` to denoise the encoded image latents.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -115,6 +115,7 @@ class StableDiffusionLDM3DPipeline(
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -66,6 +66,7 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
|
||||
with_augs ([`list`]):
|
||||
Textual augmentations to apply while editing the text-to-image model. Set to `[]` for no augmentations.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -85,6 +85,7 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -96,6 +96,7 @@ class StableDiffusionParadigmsPipeline(
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -310,6 +310,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
|
||||
Whether the pipeline requires a safety checker. We recommend setting it to True if you're using the
|
||||
pipeline publicly.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = [
|
||||
"safety_checker",
|
||||
|
||||
@ -124,6 +124,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin)
|
||||
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
||||
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -92,6 +92,7 @@ class StableDiffusionUpscalePipeline(
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
|
||||
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["watermarker", "safety_checker", "feature_extractor"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
|
||||
@ -140,6 +140,7 @@ class StableDiffusionXLPipeline(
|
||||
watermark output images. If not defined, it will default to True if the package is installed, otherwise no
|
||||
watermarker will be used.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
_callback_tensor_inputs = [
|
||||
|
||||
@ -153,6 +153,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
||||
watermark output images. If not defined, it will default to True if the package is installed, otherwise no
|
||||
watermarker will be used.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
_callback_tensor_inputs = [
|
||||
|
||||
@ -298,6 +298,7 @@ class StableDiffusionXLInpaintPipeline(
|
||||
watermark output images. If not defined, it will default to True if the package is installed, otherwise no
|
||||
watermarker will be used.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
|
||||
@ -151,6 +151,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
||||
watermark output images. If not defined, it will default to True if the package is installed, otherwise no
|
||||
watermarker will be used.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
|
||||
|
||||
@ -152,6 +152,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
|
||||
feature_extractor ([`CLIPFeatureExtractor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->adapter->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor"]
|
||||
|
||||
|
||||
@ -159,6 +159,7 @@ class StableDiffusionXLAdapterPipeline(
|
||||
feature_extractor ([`CLIPFeatureExtractor`]):
|
||||
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
||||
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
||||
|
||||
|
||||
@ -96,6 +96,7 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
|
||||
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -158,6 +158,7 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
||||
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
|
||||
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
|
||||
@ -183,6 +183,7 @@ class TextToVideoPipelineOutput(BaseOutput):
|
||||
List indicating whether the corresponding generated image contains "not-safe-for-work" (nsfw) content or
|
||||
`None` if safety checking could not be performed.
|
||||
"""
|
||||
|
||||
images: Union[List[PIL.Image.Image], np.ndarray]
|
||||
nsfw_content_detected: Optional[List[bool]]
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user