Compare commits

..

28 Commits

Author SHA1 Message Date
Patrick von Platen f5942649f5 Release: v.0.21.4-patch 2023-09-29 15:39:55 +00:00
Patrick von Platen edea57749e [Lora] fix lora fuse unfuse (#5003)
* fix lora fuse unfuse

* add same changes to loaders.py

* add test

---------

Co-authored-by: multimodalart <joaopaulo.passos+multimodal@gmail.com>
2023-09-29 15:30:16 +00:00
Sayak Paul c37c840b1b update 2023-09-27 22:13:19 +05:30
Sayak Paul 9858053bfe Release: v0.21.3 2023-09-27 22:10:50 +05:30
Patrick von Platen 6a3301fe34 resolve conflicts. 2023-09-27 22:09:04 +05:30
Patrick von Platen 813a1b2ee0 Fix one more 2023-09-19 00:09:30 +02:00
Patrick von Platen a43b8574a9 Patch release: v0.21.2 2023-09-19 00:06:16 +02:00
Sayak Paul a2f0db52e3 [LoRA] don't break offloading for incompatible lora ckpts. (#5085)
* don't break offloading for incompatible lora ckpts.

* debugging

* better condition.

* fix

* fix

* fix

* fix

---------

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2023-09-19 00:05:34 +02:00
Will Berman 92f6693b37 remove unused adapter weights in constructor (#5088)
remove adapter weights in MultiAdapter constructor
2023-09-19 00:05:28 +02:00
Will Berman 932897afa8 t2i Adapter community member fix (#5090)
* convert tensorrt controlnet

* Fix code quality

* Fix code quality

* Fix code quality

* Fix code quality

* Fix code quality

* Fix code quality

* Fix number controlnet condition

* Add convert SD XL to onnx

* Add convert SD XL to tensorrt

* Add convert SD XL to tensorrt

* Add examples in comments

* Add examples in comments

* Add test onnx controlnet

* Add tensorrt test

* Remove copied

* Move file test to examples/community

* Remove script

* Remove script

* Remove text

* Fix import

* Fix T2I MultiAdapter

* fix tests

---------

Co-authored-by: dotieuthien <thien.do@mservice.com.vn>
Co-authored-by: dotieuthien <dotieuthien9997@gmail.com>
Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: dotieuthien <hades@cinnamon.is>
2023-09-19 00:05:20 +02:00
Patrick von Platen c2940434d0 [Textual inversion] Refactor textual inversion to make it cleaner (#5076)
* [Textual inversion] Clean loading

* [Textual inversion] Clean loading

* [Textual inversion] Clean up

* [Textual inversion] Clean up

* [Textual inversion] Clean up

* [Textual inversion] Clean up
2023-09-19 00:04:53 +02:00
Patrick von Platen 60ab8fad16 Patch release: v0.21.1 2023-09-14 13:06:57 +02:00
Patrick von Platen d17240457f [Import] Add missing settings / Correct some dummy imports (#5036)
* [Import] Add missing settings

* up

* up

* up
2023-09-14 12:47:55 +02:00
Vladimir Mandic 7512fc4df5 allow loading of sd models from safetensors without online lookups using local config files (#5019)
finish config_files implementation
2023-09-14 12:47:41 +02:00
Patrick von Platen 0c2f1ccc97 [Import] Don't force transformers to be installed (#5035)
* [Import] Don't force transformers to be installed

* make style
2023-09-14 12:47:34 +02:00
Dhruv Nair 47f2d2c7be Fix model offload bug when key isn't present (#5030)
* fix model offload bug when key isn't present

* make style
2023-09-14 12:47:25 +02:00
Patrick von Platen af85591593 Patch release: v0.21.1 2023-09-14 12:46:39 +02:00
Patrick von Platen 29f15673ed Release: v0.21.0 2023-09-13 15:58:24 +02:00
Patrick von Platen 1037287e2b examples fix t2i training (#5001)
* examples fix t2i training

* make style
2023-09-12 23:52:41 +02:00
Steven Liu 6ea95b7a90 Fix PR template (#4984)
fix template
2023-09-12 19:36:38 +02:00
Patrick von Platen 0e0db625d0 Fix safety checker seq offload (#4998)
* fix safety checker

* fix safety checker

* fix safety checker
2023-09-12 18:56:35 +02:00
dg845 1f948109b8 [docs] Fix DiffusionPipeline.enable_sequential_cpu_offload docstring (#4952)
* Fix an unmatched backtick and make description more general for DiffusionPipeline.enable_sequential_cpu_offload.

* make style

* _exclude_from_cpu_offload -> self._exclude_from_cpu_offload

* make style

* apply suggestions from review

* make style
2023-09-12 08:58:47 -07:00
Patrick von Platen 37cb819df5 [Lora] Speed up lora loading (#4994)
* speed up lora loading

* Apply suggestions from code review

* up

* up

* Fix more

* Correct more

* Apply suggestions from code review

* up

* Fix more

* Fix more -

* up

* up
2023-09-12 17:51:15 +02:00
Dhruv Nair f64d52dbca fix custom diffusion tests (#4996) 2023-09-12 17:50:47 +02:00
Dhruv Nair 4d897aaff5 fix image variation slow test (#4995)
fix image variation tests

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2023-09-12 17:45:47 +02:00
Patrick von Platen b1105269b7 make style 2023-09-12 14:55:27 +00:00
Kashif Rasul 5d28d2217f [Wuerstchen] fix combined pipeline's num_images_per_prompt (#4989)
* fix encode_prompt

* added prompt_embeds and negative_prompt_embeds

* prompt_embeds for the prior only
2023-09-12 16:55:13 +02:00
Kashif Rasul 73bf620dec fix E721 Do not compare types, use isinstance() (#4992) 2023-09-12 16:52:25 +02:00
84 changed files with 917 additions and 480 deletions
+1 -1
View File
@@ -41,7 +41,7 @@ Core library:
- Schedulers: @williamberman and @patrickvonplaten
- Pipelines: @patrickvonplaten and @sayakpaul
- Training examples: @sayakpaul and @patrickvonplaten
- Docs: @stevenliu and @yiyixu
- Docs: @stevhliu and @yiyixuxu
- JAX and MPS: @pcuenca
- Audio: @sanchit-gandhi
- General functionalities: @patrickvonplaten and @sayakpaul
@@ -1138,7 +1138,7 @@ class SDXLLongPromptWeightingPipeline(DiffusionPipeline, FromSingleFileMixin, Lo
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
# 7.1 Apply denoising_end
if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
if denoising_end is not None and isinstance(denoising_end, float) and denoising_end > 0 and denoising_end < 1:
discrete_timestep_cutoff = int(
round(
self.scheduler.config.num_train_timesteps
@@ -701,7 +701,7 @@ class StableDiffusionXLReferencePipeline(StableDiffusionXLPipeline):
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
# 10.1 Apply denoising_end
if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
if denoising_end is not None and isinstance(denoising_end, float) and denoising_end > 0 and denoising_end < 1:
discrete_timestep_cutoff = int(
round(
self.scheduler.config.num_train_timesteps
+1 -1
View File
@@ -56,7 +56,7 @@ if is_wandb_available():
import wandb
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
+1 -1
View File
@@ -59,7 +59,7 @@ if is_wandb_available():
import wandb
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = logging.getLogger(__name__)
+1 -1
View File
@@ -58,7 +58,7 @@ if is_wandb_available():
import wandb
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
@@ -58,7 +58,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
+1 -1
View File
@@ -60,7 +60,7 @@ if is_wandb_available():
import wandb
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
+1 -1
View File
@@ -36,7 +36,7 @@ from diffusers.utils import check_min_version
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
# Cache compiled models across invocations of this script.
cc.initialize_cache(os.path.expanduser("~/.cache/jax/compilation_cache"))
+1 -1
View File
@@ -70,7 +70,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
@@ -58,7 +58,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
@@ -52,7 +52,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__, log_level="INFO")
@@ -55,7 +55,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__, log_level="INFO")
@@ -58,7 +58,7 @@ if is_wandb_available():
import wandb
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
@@ -1060,7 +1060,9 @@ def main(args):
)
# Prepare everything with our `accelerator`.
t2iadapter, optimizer, lr_scheduler = accelerator.prepare(t2iadapter, optimizer, lr_scheduler)
t2iadapter, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
t2iadapter, optimizer, train_dataloader, lr_scheduler
)
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
@@ -53,7 +53,7 @@ if is_wandb_available():
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__, log_level="INFO")
@@ -33,7 +33,7 @@ from diffusers.utils import check_min_version
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = logging.getLogger(__name__)
@@ -48,7 +48,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__, log_level="INFO")
@@ -57,7 +57,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
@@ -57,7 +57,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
@@ -79,7 +79,7 @@ else:
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__)
@@ -56,7 +56,7 @@ else:
# ------------------------------------------------------------------------------
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = logging.getLogger(__name__)
@@ -30,7 +30,7 @@ from diffusers.utils.import_utils import is_xformers_available
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.21.0.dev0")
check_min_version("0.21.0")
logger = get_logger(__name__, log_level="INFO")
@@ -154,6 +154,7 @@ if __name__ == "__main__":
pipe = download_from_original_stable_diffusion_ckpt(
checkpoint_path_or_dict=args.checkpoint_path,
original_config_file=args.original_config_file,
config_files=args.config_files,
image_size=args.image_size,
prediction_type=args.prediction_type,
model_type=args.pipeline_type,
+1 -1
View File
@@ -244,7 +244,7 @@ install_requires = [
setup(
name="diffusers",
version="0.21.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="0.21.4", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
description="State-of-the-art diffusion in PyTorch and JAX.",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
+1 -1
View File
@@ -1,4 +1,4 @@
__version__ = "0.21.0.dev0"
__version__ = "0.21.4"
from typing import TYPE_CHECKING
@@ -76,7 +76,7 @@ class ValueGuidedRLPipeline(DiffusionPipeline):
return x_in * self.stds[key] + self.means[key]
def to_torch(self, x_in):
if type(x_in) is dict:
if isinstance(x_in, dict):
return {k: self.to_torch(v) for k, v in x_in.items()}
elif torch.is_tensor(x_in):
return x_in.to(self.unet.device)
+549 -282
View File
File diff suppressed because it is too large Load Diff
+2
View File
@@ -90,6 +90,8 @@ class MultiAdapter(ModelMixin):
features = adapter(x)
if accume_state is None:
accume_state = features
for i in range(len(accume_state)):
accume_state[i] = w * accume_state[i]
else:
for i in range(len(features)):
accume_state[i] += w * features[i]
+4 -7
View File
@@ -304,19 +304,16 @@ class Attention(nn.Module):
self.set_processor(processor)
def set_processor(self, processor: "AttnProcessor"):
if (
hasattr(self, "processor")
and not isinstance(processor, LORA_ATTENTION_PROCESSORS)
and self.to_q.lora_layer is not None
):
def set_processor(self, processor: "AttnProcessor", _remove_lora=False):
if hasattr(self, "processor") and _remove_lora and self.to_q.lora_layer is not None:
deprecate(
"set_processor to offload LoRA",
"0.26.0",
"In detail, removing LoRA layers via calling `set_processor` or `set_default_attn_processor` is deprecated. Please make sure to call `pipe.unload_lora_weights()` instead.",
"In detail, removing LoRA layers via calling `set_default_attn_processor` is deprecated. Please make sure to call `pipe.unload_lora_weights()` instead.",
)
# TODO(Patrick, Sayak) - this can be deprecated once PEFT LoRA integration is complete
# We need to remove all LoRA layers
# Don't forget to remove ALL `_remove_lora` from the codebase
for module in self.modules():
if hasattr(module, "set_lora_layer"):
module.set_lora_layer(None)
+6 -4
View File
@@ -196,7 +196,9 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin):
return processors
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
def set_attn_processor(
self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
):
r"""
Sets the attention processor to use to compute attention.
@@ -220,9 +222,9 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin):
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
module.set_processor(processor, _remove_lora=_remove_lora)
else:
module.set_processor(processor.pop(f"{name}.processor"))
module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
@@ -244,7 +246,7 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin):
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
self.set_attn_processor(processor, _remove_lora=True)
@apply_forward_hook
def encode(self, x: torch.FloatTensor, return_dict: bool = True) -> AutoencoderKLOutput:
+6 -4
View File
@@ -517,7 +517,9 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
return processors
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
def set_attn_processor(
self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
):
r"""
Sets the attention processor to use to compute attention.
@@ -541,9 +543,9 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
module.set_processor(processor, _remove_lora=_remove_lora)
else:
module.set_processor(processor.pop(f"{name}.processor"))
module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
@@ -565,7 +567,7 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
self.set_attn_processor(processor, _remove_lora=True)
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attention_slice
def set_attention_slice(self, slice_size):
+2 -2
View File
@@ -139,7 +139,7 @@ class LoRACompatibleConv(nn.Conv2d):
self._lora_scale = lora_scale
def _unfuse_lora(self):
if not (hasattr(self, "w_up") and hasattr(self, "w_down")):
if not (getattr(self, "w_up", None) is not None and getattr(self, "w_down", None) is not None):
return
fused_weight = self.weight.data
@@ -204,7 +204,7 @@ class LoRACompatibleLinear(nn.Linear):
self._lora_scale = lora_scale
def _unfuse_lora(self):
if not (hasattr(self, "w_up") and hasattr(self, "w_down")):
if not (getattr(self, "w_up", None) is not None and getattr(self, "w_down", None) is not None):
return
fused_weight = self.weight.data
+32 -22
View File
@@ -128,6 +128,31 @@ def load_state_dict(checkpoint_file: Union[str, os.PathLike], variant: Optional[
)
def load_model_dict_into_meta(model, state_dict, device=None, dtype=None, model_name_or_path=None):
device = device or torch.device("cpu")
dtype = dtype or torch.float32
unexpected_keys = []
empty_state_dict = model.state_dict()
for param_name, param in state_dict.items():
if param_name not in empty_state_dict:
unexpected_keys.append(param_name)
continue
if empty_state_dict[param_name].shape != param.shape:
model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else ""
raise ValueError(
f"Cannot load {model_name_or_path_str}because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example."
)
accepts_dtype = "dtype" in set(inspect.signature(set_module_tensor_to_device).parameters.keys())
if accepts_dtype:
set_module_tensor_to_device(model, param_name, device, value=param, dtype=dtype)
else:
set_module_tensor_to_device(model, param_name, device, value=param)
return unexpected_keys
def _load_state_dict_into_model(model_to_load, state_dict):
# Convert old format to new format if needed from a PyTorch state_dict
# copy state_dict so _load_from_state_dict can modify it
@@ -624,29 +649,14 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
" `low_cpu_mem_usage=False` and `device_map=None` if you want to randomly initialize"
" those weights or else make sure your checkpoint file is correct."
)
unexpected_keys = []
empty_state_dict = model.state_dict()
for param_name, param in state_dict.items():
accepts_dtype = "dtype" in set(
inspect.signature(set_module_tensor_to_device).parameters.keys()
)
if param_name not in empty_state_dict:
unexpected_keys.append(param_name)
continue
if empty_state_dict[param_name].shape != param.shape:
raise ValueError(
f"Cannot load {pretrained_model_name_or_path} because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example."
)
if accepts_dtype:
set_module_tensor_to_device(
model, param_name, param_device, value=param, dtype=torch_dtype
)
else:
set_module_tensor_to_device(model, param_name, param_device, value=param)
unexpected_keys = load_model_dict_into_meta(
model,
state_dict,
device=param_device,
dtype=torch_dtype,
model_name_or_path=pretrained_model_name_or_path,
)
if cls._keys_to_ignore_on_load_unexpected is not None:
for pat in cls._keys_to_ignore_on_load_unexpected:
+6 -4
View File
@@ -191,7 +191,9 @@ class PriorTransformer(ModelMixin, ConfigMixin):
return processors
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
def set_attn_processor(
self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
):
r"""
Sets the attention processor to use to compute attention.
@@ -215,9 +217,9 @@ class PriorTransformer(ModelMixin, ConfigMixin):
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
module.set_processor(processor, _remove_lora=_remove_lora)
else:
module.set_processor(processor.pop(f"{name}.processor"))
module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
@@ -239,7 +241,7 @@ class PriorTransformer(ModelMixin, ConfigMixin):
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
self.set_attn_processor(processor, _remove_lora=True)
def forward(
self,
+6 -4
View File
@@ -613,7 +613,9 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
return processors
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
def set_attn_processor(
self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
):
r"""
Sets the attention processor to use to compute attention.
@@ -637,9 +639,9 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
module.set_processor(processor, _remove_lora=_remove_lora)
else:
module.set_processor(processor.pop(f"{name}.processor"))
module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
@@ -660,7 +662,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
self.set_attn_processor(processor, _remove_lora=True)
def set_attention_slice(self, slice_size):
r"""
+6 -4
View File
@@ -366,7 +366,9 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
fn_recursive_set_attention_slice(module, reversed_slice_size)
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
def set_attn_processor(
self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
):
r"""
Sets the attention processor to use to compute attention.
@@ -390,9 +392,9 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
module.set_processor(processor, _remove_lora=_remove_lora)
else:
module.set_processor(processor.pop(f"{name}.processor"))
module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
@@ -454,7 +456,7 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
self.set_attn_processor(processor, _remove_lora=True)
def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, (CrossAttnDownBlock3D, DownBlock3D, CrossAttnUpBlock3D, UpBlock3D)):
@@ -100,6 +100,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -127,6 +127,7 @@ class AltDiffusionImg2ImgPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -178,7 +178,7 @@ class AudioDiffusionPipeline(DiffusionPipeline):
self.scheduler.set_timesteps(steps)
step_generator = step_generator or generator
# For backwards compatibility
if type(self.unet.config.sample_size) == int:
if isinstance(self.unet.config.sample_size, int):
self.unet.config.sample_size = (self.unet.config.sample_size, self.unet.config.sample_size)
if noise is None:
noise = randn_tensor(
@@ -538,7 +538,9 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoad
return processors
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attn_processor
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
def set_attn_processor(
self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
):
r"""
Sets the attention processor to use to compute attention.
@@ -562,9 +564,9 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoad
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
module.set_processor(processor, _remove_lora=_remove_lora)
else:
module.set_processor(processor.pop(f"{name}.processor"))
module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
@@ -586,7 +588,7 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoad
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
self.set_attn_processor(processor, _remove_lora=True)
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attention_slice
def set_attention_slice(self, slice_size):
@@ -125,6 +125,7 @@ class StableDiffusionControlNetPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -149,6 +149,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -273,6 +273,7 @@ class StableDiffusionControlNetInpaintPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
+5 -5
View File
@@ -1255,7 +1255,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
self._all_hooks = []
hook = None
for model_str in self.model_cpu_offload_seq.split("->"):
model = all_model_components.pop(model_str)
model = all_model_components.pop(model_str, None)
if not isinstance(model, torch.nn.Module):
continue
@@ -1293,10 +1293,10 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
def enable_sequential_cpu_offload(self, gpu_id: int = 0, device: Union[torch.device, str] = "cuda"):
r"""
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
Note that offloading happens on a submodule basis. Memory savings are higher than with
Offloads all models to CPU using 🤗 Accelerate, significantly reducing memory usage. When called, the state
dicts of all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are saved to CPU
and then moved to `torch.device('meta')` and loaded to GPU only when their specific submodule has its `forward`
method called. Offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
@@ -1256,25 +1256,37 @@ def download_from_original_stable_diffusion_ckpt(
key_name_v2_1 = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
key_name_sd_xl_base = "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.bias"
key_name_sd_xl_refiner = "conditioner.embedders.0.model.transformer.resblocks.9.mlp.c_proj.bias"
config_url = None
# model_type = "v1"
config_url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
if config_files is not None and "v1" in config_files:
original_config_file = config_files["v1"]
else:
config_url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
if key_name_v2_1 in checkpoint and checkpoint[key_name_v2_1].shape[-1] == 1024:
# model_type = "v2"
config_url = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/v2-inference-v.yaml"
if config_files is not None and "v2" in config_files:
original_config_file = config_files["v2"]
else:
config_url = "https://raw.githubusercontent.com/Stability-AI/stablediffusion/main/configs/stable-diffusion/v2-inference-v.yaml"
if global_step == 110000:
# v2.1 needs to upcast attention
upcast_attention = True
elif key_name_sd_xl_base in checkpoint:
# only base xl has two text embedders
config_url = "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_base.yaml"
if config_files is not None and "xl" in config_files:
original_config_file = config_files["xl"]
else:
config_url = "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_base.yaml"
elif key_name_sd_xl_refiner in checkpoint:
# only refiner xl has embedder and one text embedders
config_url = "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_refiner.yaml"
original_config_file = BytesIO(requests.get(config_url).content)
if config_files is not None and "xl_refiner" in config_files:
original_config_file = config_files["xl_refiner"]
else:
config_url = "https://raw.githubusercontent.com/Stability-AI/generative-models/main/configs/inference/sd_xl_refiner.yaml"
if config_url is not None:
original_config_file = BytesIO(requests.get(config_url).content)
original_config = OmegaConf.load(original_config_file)
@@ -101,6 +101,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -191,6 +191,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -272,6 +272,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor", "inverse_scheduler"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -124,6 +124,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
"""
_optional_components = ["safety_checker", "feature_extractor"]
model_cpu_offload_seq = "text_encoder->unet->vae"
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -182,6 +182,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -66,6 +66,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
# we should give a descriptive message if the pipeline doesn't have one.
_optional_components = ["safety_checker"]
model_cpu_offload_seq = "image_encoder->unet->vae"
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -129,6 +129,7 @@ class StableDiffusionImg2ImgPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -194,6 +194,7 @@ class StableDiffusionInpaintPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -117,6 +117,7 @@ class StableDiffusionInpaintPipelineLegacy(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -91,6 +91,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -82,6 +82,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -114,6 +114,7 @@ class StableDiffusionLDM3DPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -68,6 +68,7 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -80,6 +80,7 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -95,6 +95,7 @@ class StableDiffusionParadigmsPipeline(
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -315,6 +315,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
"caption_processor",
"inverse_scheduler",
]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -119,6 +119,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin)
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -92,6 +92,7 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
"""
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["watermarker", "safety_checker", "feature_extractor"]
_exclude_from_cpu_offload = ["safety_checker"]
def __init__(
self,
@@ -50,13 +50,26 @@ class SafetyConfig(object):
_dummy_objects = {}
_additional_imports = {}
_import_structure = {
"pipeline_output": ["StableDiffusionSafePipelineOutput"],
"pipeline_stable_diffusion_safe": ["StableDiffusionPipelineSafe"],
"safety_checker": ["StableDiffusionSafetyChecker"],
}
_import_structure = {}
_additional_imports.update({"SafetyConfig": SafetyConfig})
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils import dummy_torch_and_transformers_objects
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure.update(
{
"pipeline_output": ["StableDiffusionSafePipelineOutput"],
"pipeline_stable_diffusion_safe": ["StableDiffusionPipelineSafe"],
"safety_checker": ["StableDiffusionSafetyChecker"],
}
)
if TYPE_CHECKING:
try:
@@ -70,25 +83,16 @@ if TYPE_CHECKING:
from .safety_checker import SafeStableDiffusionSafetyChecker
else:
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils import dummy_torch_and_transformers_objects
import sys
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
)
else:
import sys
sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
)
for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)
for name, value in _additional_imports.items():
setattr(sys.modules[__name__], name, value)
for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)
for name, value in _additional_imports.items():
setattr(sys.modules[__name__], name, value)
@@ -810,7 +810,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
# 7.1 Apply denoising_end
if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
if denoising_end is not None and isinstance(denoising_end, float) and denoising_end > 0 and denoising_end < 1:
discrete_timestep_cutoff = int(
round(
self.scheduler.config.num_train_timesteps
@@ -885,7 +885,7 @@ class StableDiffusionXLImg2ImgPipeline(
# 5. Prepare timesteps
def denoising_value_valid(dnv):
return type(denoising_end) == float and 0 < dnv < 1
return isinstance(denoising_end, float) and 0 < dnv < 1
self.scheduler.set_timesteps(num_inference_steps, device=device)
timesteps, num_inference_steps = self.get_timesteps(
@@ -1120,7 +1120,7 @@ class StableDiffusionXLInpaintPipeline(
# 4. set timesteps
def denoising_value_valid(dnv):
return type(denoising_end) == float and 0 < dnv < 1
return isinstance(denoising_end, float) and 0 < dnv < 1
self.scheduler.set_timesteps(num_inference_steps, device=device)
timesteps, num_inference_steps = self.get_timesteps(
@@ -837,7 +837,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
# 11. Denoising loop
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
if denoising_end is not None and isinstance(denoising_end, float) and denoising_end > 0 and denoising_end < 1:
discrete_timestep_cutoff = int(
round(
self.scheduler.config.num_train_timesteps
@@ -162,7 +162,6 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
scheduler: KarrasDiffusionSchedulers,
safety_checker: StableDiffusionSafetyChecker,
feature_extractor: CLIPFeatureExtractor,
adapter_weights: Optional[List[float]] = None,
requires_safety_checker: bool = True,
):
super().__init__()
@@ -184,7 +183,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
)
if isinstance(adapter, (list, tuple)):
adapter = MultiAdapter(adapter, adapter_weights=adapter_weights)
adapter = MultiAdapter(adapter)
self.register_modules(
vae=vae,
@@ -727,9 +726,14 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
# 7. Denoising loop
adapter_state = self.adapter(adapter_input)
for k, v in enumerate(adapter_state):
adapter_state[k] = v * adapter_conditioning_scale
if isinstance(self.adapter, MultiAdapter):
adapter_state = self.adapter(adapter_input, adapter_conditioning_scale)
for k, v in enumerate(adapter_state):
adapter_state[k] = v
else:
adapter_state = self.adapter(adapter_input)
for k, v in enumerate(adapter_state):
adapter_state[k] = v * adapter_conditioning_scale
if num_images_per_prompt > 1:
for k, v in enumerate(adapter_state):
adapter_state[k] = v.repeat(num_images_per_prompt, 1, 1, 1)
@@ -886,7 +886,7 @@ class StableDiffusionXLAdapterPipeline(
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
# 7.1 Apply denoising_end
if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
if denoising_end is not None and isinstance(denoising_end, float) and denoising_end > 0 and denoising_end < 1:
discrete_timestep_cutoff = int(
round(
self.scheduler.config.num_train_timesteps
@@ -47,3 +47,5 @@ else:
_import_structure,
module_spec=__spec__,
)
for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)
@@ -820,7 +820,9 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
return processors
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
def set_attn_processor(
self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]], _remove_lora=False
):
r"""
Sets the attention processor to use to compute attention.
@@ -844,9 +846,9 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
if hasattr(module, "set_processor"):
if not isinstance(processor, dict):
module.set_processor(processor)
module.set_processor(processor, _remove_lora=_remove_lora)
else:
module.set_processor(processor.pop(f"{name}.processor"))
module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
for sub_name, child in module.named_children():
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
@@ -868,7 +870,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
f" {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
self.set_attn_processor(processor, _remove_lora=True)
def set_attention_slice(self, slice_size):
r"""
@@ -51,3 +51,6 @@ else:
_import_structure,
module_spec=__spec__,
)
for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)
@@ -41,7 +41,6 @@ if TYPE_CHECKING:
from .pipeline_wuerstchen import WuerstchenDecoderPipeline
from .pipeline_wuerstchen_combined import WuerstchenCombinedPipeline
from .pipeline_wuerstchen_prior import WuerstchenPriorPipeline
else:
import sys
@@ -51,3 +50,6 @@ else:
_import_structure,
module_spec=__spec__,
)
for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)
@@ -330,7 +330,11 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
# 2. Encode caption
prompt_embeds, negative_prompt_embeds = self.encode_prompt(
prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
prompt,
device,
image_embeddings.size(0) * num_images_per_prompt,
do_classifier_free_guidance,
negative_prompt,
)
text_encoder_hidden_states = (
torch.cat([prompt_embeds, negative_prompt_embeds]) if negative_prompt_embeds is not None else prompt_embeds
@@ -154,6 +154,8 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
decoder_timesteps: Optional[List[float]] = None,
decoder_guidance_scale: float = 0.0,
negative_prompt: Optional[Union[str, List[str]]] = None,
prompt_embeds: Optional[torch.FloatTensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
num_images_per_prompt: int = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None,
@@ -165,10 +167,17 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
The prompt or prompts to guide the image generation for the prior and decoder.
negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
if `guidance_scale` is less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated negative text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.*
prompt weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt`
input argument.
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
height (`int`, *optional*, defaults to 512):
@@ -221,13 +230,15 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
otherwise a `tuple`. When returning a tuple, the first element is a list with the generated images.
"""
prior_outputs = self.prior_pipe(
prompt=prompt,
prompt=prompt if prompt_embeds is None else None,
height=height,
width=width,
num_inference_steps=prior_num_inference_steps,
timesteps=prior_timesteps,
guidance_scale=prior_guidance_scale,
negative_prompt=negative_prompt,
negative_prompt=negative_prompt if negative_prompt_embeds is None else None,
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
num_images_per_prompt=num_images_per_prompt,
generator=generator,
latents=latents,
@@ -150,41 +150,57 @@ class WuerstchenPriorPipeline(DiffusionPipeline):
def encode_prompt(
self,
prompt,
device,
num_images_per_prompt,
do_classifier_free_guidance,
prompt=None,
negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
):
batch_size = len(prompt) if isinstance(prompt, list) else 1
# get prompt text embeddings
text_inputs = self.tokenizer(
prompt,
padding="max_length",
max_length=self.tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
text_input_ids = text_inputs.input_ids
attention_mask = text_inputs.attention_mask
if prompt is not None and isinstance(prompt, str):
batch_size = 1
elif prompt is not None and isinstance(prompt, list):
batch_size = len(prompt)
else:
batch_size = prompt_embeds.shape[0]
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1])
logger.warning(
"The following part of your input was truncated because CLIP can only handle sequences up to"
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
if prompt_embeds is None:
# get prompt text embeddings
text_inputs = self.tokenizer(
prompt,
padding="max_length",
max_length=self.tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length]
attention_mask = attention_mask[:, : self.tokenizer.model_max_length]
text_input_ids = text_inputs.input_ids
attention_mask = text_inputs.attention_mask
text_encoder_output = self.text_encoder(text_input_ids.to(device), attention_mask=attention_mask.to(device))
text_encoder_hidden_states = text_encoder_output.last_hidden_state
text_encoder_hidden_states = text_encoder_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
uncond_text_encoder_hidden_states = None
if do_classifier_free_guidance:
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
text_input_ids, untruncated_ids
):
removed_text = self.tokenizer.batch_decode(
untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]
)
logger.warning(
"The following part of your input was truncated because CLIP can only handle sequences up to"
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
)
text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length]
attention_mask = attention_mask[:, : self.tokenizer.model_max_length]
text_encoder_output = self.text_encoder(
text_input_ids.to(device), attention_mask=attention_mask.to(device)
)
prompt_embeds = text_encoder_output.last_hidden_state
prompt_embeds = prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
prompt_embeds = prompt_embeds.repeat_interleave(num_images_per_prompt, dim=0)
if negative_prompt_embeds is None and do_classifier_free_guidance:
uncond_tokens: List[str]
if negative_prompt is None:
uncond_tokens = [""] * batch_size
@@ -215,17 +231,17 @@ class WuerstchenPriorPipeline(DiffusionPipeline):
uncond_input.input_ids.to(device), attention_mask=uncond_input.attention_mask.to(device)
)
uncond_text_encoder_hidden_states = negative_prompt_embeds_text_encoder_output.last_hidden_state
negative_prompt_embeds = negative_prompt_embeds_text_encoder_output.last_hidden_state
if do_classifier_free_guidance:
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
seq_len = uncond_text_encoder_hidden_states.shape[1]
uncond_text_encoder_hidden_states = uncond_text_encoder_hidden_states.repeat(1, num_images_per_prompt, 1)
uncond_text_encoder_hidden_states = uncond_text_encoder_hidden_states.view(
batch_size * num_images_per_prompt, seq_len, -1
)
seq_len = negative_prompt_embeds.shape[1]
negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
# done duplicates
return text_encoder_hidden_states, uncond_text_encoder_hidden_states
return prompt_embeds, negative_prompt_embeds
def check_inputs(
self,
@@ -264,13 +280,15 @@ class WuerstchenPriorPipeline(DiffusionPipeline):
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
self,
prompt: Union[str, List[str]] = None,
prompt: Optional[Union[str, List[str]]] = None,
height: int = 1024,
width: int = 1024,
num_inference_steps: int = 60,
timesteps: List[float] = None,
guidance_scale: float = 8.0,
negative_prompt: Optional[Union[str, List[str]]] = None,
prompt_embeds: Optional[torch.FloatTensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
num_images_per_prompt: Optional[int] = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None,
@@ -304,6 +322,13 @@ class WuerstchenPriorPipeline(DiffusionPipeline):
negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
if `decoder_guidance_scale` is less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument.
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -345,7 +370,13 @@ class WuerstchenPriorPipeline(DiffusionPipeline):
# 2. Encode caption
prompt_embeds, negative_prompt_embeds = self.encode_prompt(
prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
prompt=prompt,
device=device,
num_images_per_prompt=num_images_per_prompt,
do_classifier_free_guidance=do_classifier_free_guidance,
negative_prompt=negative_prompt,
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
)
# For classifier free guidance, we need to do two forward passes.
+39 -1
View File
@@ -43,7 +43,7 @@ from diffusers.models.attention_processor import (
LoRAAttnProcessor2_0,
XFormersAttnProcessor,
)
from diffusers.utils.testing_utils import floats_tensor, require_torch_gpu, slow, torch_device
from diffusers.utils.testing_utils import floats_tensor, nightly, require_torch_gpu, slow, torch_device
def create_unet_lora_layers(unet: nn.Module):
@@ -1464,3 +1464,41 @@ class LoraIntegrationTests(unittest.TestCase):
expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
@nightly
def test_sequential_fuse_unfuse(self):
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
# 1. round
pipe.load_lora_weights("Pclanglais/TintinIA")
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
image_slice = images[0, -3:, -3:, -1].flatten()
pipe.unfuse_lora()
# 2. round
pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style")
pipe.fuse_lora()
pipe.unfuse_lora()
# 3. round
pipe.load_lora_weights("ostris/crayon_style_lora_sdxl")
pipe.fuse_lora()
pipe.unfuse_lora()
# 4. back to 1st round
pipe.load_lora_weights("Pclanglais/TintinIA")
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
images_2 = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
image_slice_2 = images_2[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3))
@@ -785,8 +785,8 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_custom_diffusion_weights.bin")))
torch.manual_seed(0)
new_model = self.model_class(**init_dict)
new_model.to(torch_device)
new_model.load_attn_procs(tmpdirname, weight_name="pytorch_custom_diffusion_weights.bin")
new_model.to(torch_device)
with torch.no_grad():
new_sample = new_model(**inputs_dict).sample
@@ -193,7 +193,7 @@ class ConsistencyModelPipelineSlowTests(unittest.TestCase):
return inputs
def get_fixed_latents(self, seed=0, device="cpu", dtype=torch.float32, shape=(1, 3, 64, 64)):
if type(device) == str:
if isinstance(device, str):
device = torch.device(device)
generator = torch.Generator(device=device).manual_seed(seed)
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
@@ -923,9 +923,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
pipe = StableDiffusionPipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16
)
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.enable_attention_slicing(1)
@@ -216,7 +216,9 @@ class StableDiffusionMultiAdapterPipelineFastTests(AdapterTests, PipelineTesterM
return super().get_dummy_components("multi_adapter")
def get_dummy_inputs(self, device, seed=0):
return super().get_dummy_inputs(device, seed, num_images=2)
inputs = super().get_dummy_inputs(device, seed, num_images=2)
inputs["adapter_conditioning_scale"] = [0.5, 0.5]
return inputs
def test_stable_diffusion_adapter_default_case(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
@@ -35,6 +35,8 @@ from diffusers.utils.testing_utils import (
load_image,
load_numpy,
nightly,
numpy_cosine_similarity_distance,
print_tensor_test,
require_torch_gpu,
slow,
torch_device,
@@ -182,7 +184,7 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
"generator": generator,
"num_inference_steps": 3,
"guidance_scale": 7.5,
"output_type": "numpy",
"output_type": "np",
}
return inputs
@@ -193,13 +195,17 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device)
generator_device = "cpu"
inputs = self.get_inputs(generator_device)
image = sd_pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1].flatten()
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.84491, 0.90789, 0.75708, 0.78734, 0.83485, 0.70099, 0.66938, 0.68727, 0.61379])
assert np.abs(image_slice - expected_slice).max() < 6e-3
expected_slice = np.array([0.8449, 0.9079, 0.7571, 0.7873, 0.8348, 0.7010, 0.6694, 0.6873, 0.6138])
print_tensor_test(image_slice)
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
assert max_diff < 1e-4
def test_stable_diffusion_img_variation_intermediate_state(self):
number_of_steps = 0
@@ -212,31 +218,36 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
latents = latents.detach().cpu().numpy()
assert latents.shape == (1, 4, 64, 64)
latents_slice = latents[0, -3:, -3:, -1]
expected_slice = np.array(
[-0.1621, 0.2837, -0.7979, -0.1221, -1.3057, 0.7681, -2.1191, 0.0464, 1.6309]
)
expected_slice = np.array([-0.7974, -0.4343, -1.087, 0.04785, -1.327, 0.855, -2.148, -0.1725, 1.439])
max_diff = numpy_cosine_similarity_distance(latents_slice.flatten(), expected_slice)
assert max_diff < 1e-3
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
elif step == 2:
latents = latents.detach().cpu().numpy()
assert latents.shape == (1, 4, 64, 64)
latents_slice = latents[0, -3:, -3:, -1]
expected_slice = np.array([0.6299, 1.7500, 1.1992, -2.1582, -1.8994, 0.7334, -0.7090, 1.0137, 1.5273])
expected_slice = np.array([0.3232, 0.004883, 0.913, -1.084, 0.6143, -1.6875, -2.463, -0.439, -0.419])
max_diff = numpy_cosine_similarity_distance(latents_slice.flatten(), expected_slice)
assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
assert max_diff < 1e-3
callback_fn.has_been_called = False
pipe = StableDiffusionImageVariationPipeline.from_pretrained(
"fusing/sd-image-variations-diffusers",
"lambdalabs/sd-image-variations-diffusers",
safety_checker=None,
torch_dtype=torch.float16,
)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.enable_attention_slicing()
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
inputs = self.get_inputs(torch_device, dtype=torch.float16)
generator_device = "cpu"
inputs = self.get_inputs(generator_device, dtype=torch.float16)
pipe(**inputs, callback=callback_fn, callback_steps=1)
assert callback_fn.has_been_called
assert number_of_steps == inputs["num_inference_steps"]
@@ -246,9 +257,8 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
model_id = "fusing/sd-image-variations-diffusers"
pipe = StableDiffusionImageVariationPipeline.from_pretrained(
model_id, safety_checker=None, torch_dtype=torch.float16
"lambdalabs/sd-image-variations-diffusers", safety_checker=None, torch_dtype=torch.float16
)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -109,7 +109,7 @@ class UniDiffuserPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
return inputs
def get_fixed_latents(self, device, seed=0):
if type(device) == str:
if isinstance(device, str):
device = torch.device(device)
generator = torch.Generator(device=device).manual_seed(seed)
# Hardcode the shapes for now.
@@ -545,7 +545,7 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase):
return inputs
def get_fixed_latents(self, device, seed=0):
if type(device) == str:
if isinstance(device, str):
device = torch.device(device)
latent_device = torch.device("cpu")
generator = torch.Generator(device=latent_device).manual_seed(seed)
@@ -648,7 +648,7 @@ class UniDiffuserPipelineNightlyTests(unittest.TestCase):
return inputs
def get_fixed_latents(self, device, seed=0):
if type(device) == str:
if isinstance(device, str):
device = torch.device(device)
latent_device = torch.device("cpu")
generator = torch.Generator(device=latent_device).manual_seed(seed)