Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 965e52ce61 | |||
| b1e52794a2 | |||
| c3e3a1ee10 | |||
| 9cde56a729 | |||
| c63d7cdba0 | |||
| aa4634a7fa | |||
| 0709650e9d | |||
| a9829164f4 | |||
| 49c95178ad | |||
| c2f755bc62 | |||
| 2fb877b66c | |||
| ef9824f9f7 |
+1
-1
@@ -90,7 +90,7 @@ The following design principles are followed:
|
||||
- To integrate new model checkpoints whose general architecture can be classified as an architecture that already exists in Diffusers, the existing model architecture shall be adapted to make it work with the new checkpoint. One should only create a new file if the model architecture is fundamentally different.
|
||||
- Models should be designed to be easily extendable to future changes. This can be achieved by limiting public function arguments, configuration arguments, and "foreseeing" future changes, *e.g.* it is usually better to add `string` "...type" arguments that can easily be extended to new future types instead of boolean `is_..._type` arguments. Only the minimum amount of changes shall be made to existing architectures to make a new model checkpoint work.
|
||||
- The model design is a difficult trade-off between keeping code readable and concise and supporting many model checkpoints. For most parts of the modeling code, classes shall be adapted for new model checkpoints, while there are some exceptions where it is preferred to add new classes to make sure the code is kept concise and
|
||||
readable longterm, such as [UNet blocks](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_blocks.py) and [Attention processors](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
readable longterm, such as [UNet blocks](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_blocks.py) and [Attention processors](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
### Schedulers
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
<p align="center">
|
||||
<br>
|
||||
<img src="https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg" width="400"/>
|
||||
<img src="https://github.com/huggingface/diffusers/blob/main/docs/source/en/imgs/diffusers_library.jpg" width="400"/>
|
||||
<br>
|
||||
<p>
|
||||
<p align="center">
|
||||
|
||||
@@ -107,13 +107,13 @@ One cheeseburger monster coming up! Enjoy!
|
||||
|
||||
<Tip>
|
||||
|
||||
We also provide an end-to-end Kandinsky pipeline [`KandinskyCombinedPipeline`], which combines both the prior pipeline and text-to-image pipeline, and lets you perform inference in a single step. You can create the combined pipeline with the [`~AutoPipelineForText2Image.from_pretrained`] method
|
||||
We also provide an end-to-end Kandinsky pipeline [`KandinskyCombinedPipeline`], which combines both the prior pipeline and text-to-image pipeline, and lets you perform inference in a single step. You can create the combined pipeline with the [`~AutoPipelineForTextToImage.from_pretrained`] method
|
||||
|
||||
```python
|
||||
from diffusers import AutoPipelineForText2Image
|
||||
from diffusers import AutoPipelineForTextToImage
|
||||
import torch
|
||||
|
||||
pipe = AutoPipelineForText2Image.from_pretrained(
|
||||
pipe = AutoPipelineForTextToImage.from_pretrained(
|
||||
"kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
@@ -69,7 +69,7 @@ Next, create the adapter pipeline
|
||||
import torch
|
||||
from diffusers import StableDiffusionAdapterPipeline, T2IAdapter
|
||||
|
||||
adapter = T2IAdapter.from_pretrained("TencentARC/t2iadapter_color_sd14v1", torch_dtype=torch.float16)
|
||||
adapter = T2IAdapter.from_pretrained("TencentARC/t2iadapter_color_sd14v1")
|
||||
pipe = StableDiffusionAdapterPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-4",
|
||||
adapter=adapter,
|
||||
|
||||
@@ -30,8 +30,8 @@ Make sure to check out the Stable Diffusion [Tips](overview#tips) section to lea
|
||||
- all
|
||||
- __call__
|
||||
|
||||
## LDM3DPipelineOutput
|
||||
## StableDiffusionPipelineOutput
|
||||
|
||||
[[autodoc]] pipelines.stable_diffusion.pipeline_stable_diffusion_ldm3d.LDM3DPipelineOutput
|
||||
[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
|
||||
- all
|
||||
- __call__
|
||||
|
||||
@@ -90,7 +90,7 @@ The following design principles are followed:
|
||||
- To integrate new model checkpoints whose general architecture can be classified as an architecture that already exists in Diffusers, the existing model architecture shall be adapted to make it work with the new checkpoint. One should only create a new file if the model architecture is fundamentally different.
|
||||
- Models should be designed to be easily extendable to future changes. This can be achieved by limiting public function arguments, configuration arguments, and "foreseeing" future changes, *e.g.* it is usually better to add `string` "...type" arguments that can easily be extended to new future types instead of boolean `is_..._type` arguments. Only the minimum amount of changes shall be made to existing architectures to make a new model checkpoint work.
|
||||
- The model design is a difficult trade-off between keeping code readable and concise and supporting many model checkpoints. For most parts of the modeling code, classes shall be adapted for new model checkpoints, while there are some exceptions where it is preferred to add new classes to make sure the code is kept concise and
|
||||
readable longterm, such as [UNet blocks](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_blocks.py) and [Attention processors](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
readable longterm, such as [UNet blocks](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/unet_2d_blocks.py) and [Attention processors](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
### Schedulers
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
|
||||
# How to use ONNX Runtime for inference
|
||||
# How to use the ONNX Runtime for inference
|
||||
|
||||
🤗 [Optimum](https://github.com/huggingface/optimum) provides a Stable Diffusion pipeline compatible with ONNX Runtime.
|
||||
|
||||
@@ -27,7 +27,7 @@ pip install optimum["onnxruntime"]
|
||||
|
||||
### Inference
|
||||
|
||||
To load an ONNX model and run inference with ONNX Runtime, you need to replace [`StableDiffusionPipeline`] with `ORTStableDiffusionPipeline`. In case you want to load a PyTorch model and convert it to the ONNX format on-the-fly, you can set `export=True`.
|
||||
To load an ONNX model and run inference with the ONNX Runtime, you need to replace [`StableDiffusionPipeline`] with `ORTStableDiffusionPipeline`. In case you want to load a PyTorch model and convert it to the ONNX format on-the-fly, you can set `export=True`.
|
||||
|
||||
```python
|
||||
from optimum.onnxruntime import ORTStableDiffusionPipeline
|
||||
|
||||
@@ -327,7 +327,3 @@ image = pipe(prompt, num_inference_steps=20, generator=generator, image=control_
|
||||
|
||||
image.save("./output.png")
|
||||
```
|
||||
|
||||
## Stable Diffusion XL
|
||||
|
||||
Training with [Stable Diffusion XL](https://huggingface.co/papers/2307.01952) is also supported via the `train_controlnet_sdxl.py` script. Please refer to the docs [here](https://github.com/huggingface/diffusers/blob/main/examples/controlnet/README_sdxl.md).
|
||||
@@ -212,4 +212,4 @@ If you're looking for some interesting ways to use the InstructPix2Pix training
|
||||
|
||||
## Stable Diffusion XL
|
||||
|
||||
Training with [Stable Diffusion XL](https://huggingface.co/papers/2307.01952) is also supported via the `train_instruct_pix2pix_sdxl.py` script. Please refer to the docs [here](https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md).
|
||||
We support fine-tuning of the UNet shipped in [Stable Diffusion XL](https://huggingface.co/papers/2307.01952) with DreamBooth and LoRA via the `train_dreambooth_lora_sdxl.py` script. Please refer to the docs [here](https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md).
|
||||
@@ -40,7 +40,6 @@ Unless otherwise mentioned, these are techniques that work with existing models
|
||||
12. [Custom Diffusion](#custom-diffusion)
|
||||
13. [Model Editing](#model-editing)
|
||||
14. [DiffEdit](#diffedit)
|
||||
15. [T2I-Adapter](#t2i-adapter)
|
||||
|
||||
For convenience, we provide a table to denote which methods are inference-only and which require fine-tuning/training.
|
||||
|
||||
|
||||
@@ -423,7 +423,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline):
|
||||
num_inference_steps (`int`, *optional*, defaults to 50):
|
||||
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
||||
expense of slower inference.
|
||||
guidance_scale (`float`, *optional*, defaults to 5.0):
|
||||
guidance_scale (`float`, *optional*, defaults to 7.5):
|
||||
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
|
||||
`guidance_scale` is defined as `w` of equation 2. of [Imagen
|
||||
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
||||
|
||||
@@ -967,7 +967,7 @@ class StableDiffusionLongPromptWeightingPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Returns:
|
||||
`None` if cancelled by `is_cancelled_callback`,
|
||||
@@ -1202,7 +1202,7 @@ class StableDiffusionLongPromptWeightingPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Returns:
|
||||
`None` if cancelled by `is_cancelled_callback`,
|
||||
@@ -1316,7 +1316,7 @@ class StableDiffusionLongPromptWeightingPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Returns:
|
||||
`None` if cancelled by `is_cancelled_callback`,
|
||||
@@ -1437,7 +1437,7 @@ class StableDiffusionLongPromptWeightingPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Returns:
|
||||
`None` if cancelled by `is_cancelled_callback`,
|
||||
|
||||
@@ -777,7 +777,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet.
|
||||
|
||||
@@ -912,7 +912,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet.
|
||||
|
||||
@@ -911,7 +911,7 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet.
|
||||
|
||||
@@ -201,7 +201,7 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
||||
|
||||
@@ -729,7 +729,7 @@ class StableDiffusionIPEXPipeline(DiffusionPipeline):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttnProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -249,7 +249,7 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
||||
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
||||
Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ControlNet training example for Stable Diffusion XL (SDXL)
|
||||
# DreamBooth training example for Stable Diffusion XL (SDXL)
|
||||
|
||||
The `train_controlnet_sdxl.py` script shows how to implement the ControlNet training procedure and adapt it for [Stable Diffusion XL](https://huggingface.co/papers/2307.01952).
|
||||
The `train_controlnet_sdxl.py` script shows how to implement the training procedure and adapt it for [Stable Diffusion XL](https://huggingface.co/papers/2307.01952).
|
||||
|
||||
## Running locally with PyTorch
|
||||
|
||||
@@ -128,4 +128,4 @@ image.save("./output.png")
|
||||
|
||||
### Specifying a better VAE
|
||||
|
||||
SDXL's VAE is known to suffer from numerical instability issues. This is why we also expose a CLI argument namely `--pretrained_vae_model_name_or_path` that lets you specify the location of a better VAE (such as [this one](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix)).
|
||||
SDXL's VAE is known to suffer from numerical instability issues. This is why we also expose a CLI argument namely `--pretrained_vae_model_name_or_path` that lets you specify the location of a better VAE (such as [this one](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix)).
|
||||
@@ -56,7 +56,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -210,7 +210,7 @@ def save_model_card(repo_id: str, image_logs=None, base_model=str, repo_folder=N
|
||||
|
||||
yaml = f"""
|
||||
---
|
||||
license: openrail++
|
||||
license: creativeml-openrail-m
|
||||
base_model: {base_model}
|
||||
tags:
|
||||
- stable-diffusion-xl
|
||||
@@ -227,7 +227,12 @@ inference: true
|
||||
These are controlnet weights trained on {base_model} with new type of conditioning.
|
||||
{img_str}
|
||||
"""
|
||||
model_card += """
|
||||
|
||||
## License
|
||||
|
||||
[SDXL 1.0 License](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENSE.md)
|
||||
"""
|
||||
with open(os.path.join(repo_folder, "README.md"), "w") as f:
|
||||
f.write(yaml + model_card)
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
|
||||
import argparse
|
||||
import copy
|
||||
import gc
|
||||
import hashlib
|
||||
import itertools
|
||||
@@ -60,7 +59,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -592,14 +591,14 @@ class DreamBoothDataset(Dataset):
|
||||
size=512,
|
||||
center_crop=False,
|
||||
encoder_hidden_states=None,
|
||||
class_prompt_encoder_hidden_states=None,
|
||||
instance_prompt_encoder_hidden_states=None,
|
||||
tokenizer_max_length=None,
|
||||
):
|
||||
self.size = size
|
||||
self.center_crop = center_crop
|
||||
self.tokenizer = tokenizer
|
||||
self.encoder_hidden_states = encoder_hidden_states
|
||||
self.class_prompt_encoder_hidden_states = class_prompt_encoder_hidden_states
|
||||
self.instance_prompt_encoder_hidden_states = instance_prompt_encoder_hidden_states
|
||||
self.tokenizer_max_length = tokenizer_max_length
|
||||
|
||||
self.instance_data_root = Path(instance_data_root)
|
||||
@@ -662,8 +661,8 @@ class DreamBoothDataset(Dataset):
|
||||
class_image = class_image.convert("RGB")
|
||||
example["class_images"] = self.image_transforms(class_image)
|
||||
|
||||
if self.class_prompt_encoder_hidden_states is not None:
|
||||
example["class_prompt_ids"] = self.class_prompt_encoder_hidden_states
|
||||
if self.instance_prompt_encoder_hidden_states is not None:
|
||||
example["class_prompt_ids"] = self.instance_prompt_encoder_hidden_states
|
||||
else:
|
||||
class_text_inputs = tokenize_prompt(
|
||||
self.tokenizer, self.class_prompt, tokenizer_max_length=self.tokenizer_max_length
|
||||
@@ -1027,10 +1026,10 @@ def main(args):
|
||||
else:
|
||||
validation_prompt_encoder_hidden_states = None
|
||||
|
||||
if args.class_prompt is not None:
|
||||
pre_computed_class_prompt_encoder_hidden_states = compute_text_embeddings(args.class_prompt)
|
||||
if args.instance_prompt is not None:
|
||||
pre_computed_instance_prompt_encoder_hidden_states = compute_text_embeddings(args.instance_prompt)
|
||||
else:
|
||||
pre_computed_class_prompt_encoder_hidden_states = None
|
||||
pre_computed_instance_prompt_encoder_hidden_states = None
|
||||
|
||||
text_encoder = None
|
||||
tokenizer = None
|
||||
@@ -1041,7 +1040,7 @@ def main(args):
|
||||
pre_computed_encoder_hidden_states = None
|
||||
validation_prompt_encoder_hidden_states = None
|
||||
validation_prompt_negative_prompt_embeds = None
|
||||
pre_computed_class_prompt_encoder_hidden_states = None
|
||||
pre_computed_instance_prompt_encoder_hidden_states = None
|
||||
|
||||
# Dataset and DataLoaders creation:
|
||||
train_dataset = DreamBoothDataset(
|
||||
@@ -1054,7 +1053,7 @@ def main(args):
|
||||
size=args.resolution,
|
||||
center_crop=args.center_crop,
|
||||
encoder_hidden_states=pre_computed_encoder_hidden_states,
|
||||
class_prompt_encoder_hidden_states=pre_computed_class_prompt_encoder_hidden_states,
|
||||
instance_prompt_encoder_hidden_states=pre_computed_instance_prompt_encoder_hidden_states,
|
||||
tokenizer_max_length=args.tokenizer_max_length,
|
||||
)
|
||||
|
||||
@@ -1117,7 +1116,7 @@ def main(args):
|
||||
# We need to initialize the trackers we use, and also store our configuration.
|
||||
# The trackers initializes automatically on the main process.
|
||||
if accelerator.is_main_process:
|
||||
tracker_config = vars(copy.deepcopy(args))
|
||||
tracker_config = vars(args)
|
||||
tracker_config.pop("validation_images")
|
||||
accelerator.init_trackers("dreambooth", config=tracker_config)
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ from diffusers.utils import check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
# Cache compiled models across invocations of this script.
|
||||
cc.initialize_cache(os.path.expanduser("~/.cache/jax/compilation_cache"))
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
|
||||
import argparse
|
||||
import copy
|
||||
import gc
|
||||
import hashlib
|
||||
import itertools
|
||||
@@ -70,7 +69,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -492,14 +491,14 @@ class DreamBoothDataset(Dataset):
|
||||
size=512,
|
||||
center_crop=False,
|
||||
encoder_hidden_states=None,
|
||||
class_prompt_encoder_hidden_states=None,
|
||||
instance_prompt_encoder_hidden_states=None,
|
||||
tokenizer_max_length=None,
|
||||
):
|
||||
self.size = size
|
||||
self.center_crop = center_crop
|
||||
self.tokenizer = tokenizer
|
||||
self.encoder_hidden_states = encoder_hidden_states
|
||||
self.class_prompt_encoder_hidden_states = class_prompt_encoder_hidden_states
|
||||
self.instance_prompt_encoder_hidden_states = instance_prompt_encoder_hidden_states
|
||||
self.tokenizer_max_length = tokenizer_max_length
|
||||
|
||||
self.instance_data_root = Path(instance_data_root)
|
||||
@@ -562,8 +561,8 @@ class DreamBoothDataset(Dataset):
|
||||
class_image = class_image.convert("RGB")
|
||||
example["class_images"] = self.image_transforms(class_image)
|
||||
|
||||
if self.class_prompt_encoder_hidden_states is not None:
|
||||
example["class_prompt_ids"] = self.class_prompt_encoder_hidden_states
|
||||
if self.instance_prompt_encoder_hidden_states is not None:
|
||||
example["class_prompt_ids"] = self.instance_prompt_encoder_hidden_states
|
||||
else:
|
||||
class_text_inputs = tokenize_prompt(
|
||||
self.tokenizer, self.class_prompt, tokenizer_max_length=self.tokenizer_max_length
|
||||
@@ -993,10 +992,10 @@ def main(args):
|
||||
else:
|
||||
validation_prompt_encoder_hidden_states = None
|
||||
|
||||
if args.class_prompt is not None:
|
||||
pre_computed_class_prompt_encoder_hidden_states = compute_text_embeddings(args.instance_prompt)
|
||||
if args.instance_prompt is not None:
|
||||
pre_computed_instance_prompt_encoder_hidden_states = compute_text_embeddings(args.instance_prompt)
|
||||
else:
|
||||
pre_computed_class_prompt_encoder_hidden_states = None
|
||||
pre_computed_instance_prompt_encoder_hidden_states = None
|
||||
|
||||
text_encoder = None
|
||||
tokenizer = None
|
||||
@@ -1007,7 +1006,7 @@ def main(args):
|
||||
pre_computed_encoder_hidden_states = None
|
||||
validation_prompt_encoder_hidden_states = None
|
||||
validation_prompt_negative_prompt_embeds = None
|
||||
pre_computed_class_prompt_encoder_hidden_states = None
|
||||
pre_computed_instance_prompt_encoder_hidden_states = None
|
||||
|
||||
# Dataset and DataLoaders creation:
|
||||
train_dataset = DreamBoothDataset(
|
||||
@@ -1020,7 +1019,7 @@ def main(args):
|
||||
size=args.resolution,
|
||||
center_crop=args.center_crop,
|
||||
encoder_hidden_states=pre_computed_encoder_hidden_states,
|
||||
class_prompt_encoder_hidden_states=pre_computed_class_prompt_encoder_hidden_states,
|
||||
instance_prompt_encoder_hidden_states=pre_computed_instance_prompt_encoder_hidden_states,
|
||||
tokenizer_max_length=args.tokenizer_max_length,
|
||||
)
|
||||
|
||||
@@ -1068,7 +1067,7 @@ def main(args):
|
||||
# We need to initialize the trackers we use, and also store our configuration.
|
||||
# The trackers initializes automatically on the main process.
|
||||
if accelerator.is_main_process:
|
||||
tracker_config = vars(copy.deepcopy(args))
|
||||
tracker_config = vars(args)
|
||||
tracker_config.pop("validation_images")
|
||||
accelerator.init_trackers("dreambooth-lora", config=tracker_config)
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -73,7 +73,7 @@ def save_model_card(
|
||||
|
||||
yaml = f"""
|
||||
---
|
||||
license: openrail++
|
||||
license: creativeml-openrail-m
|
||||
base_model: {base_model}
|
||||
instance_prompt: {prompt}
|
||||
tags:
|
||||
@@ -94,6 +94,10 @@ These are LoRA adaption weights for {base_model}. The weights were trained on {p
|
||||
LoRA for the text encoder was enabled: {train_text_encoder}.
|
||||
|
||||
Special VAE used for training: {vae_path}.
|
||||
|
||||
## License
|
||||
|
||||
[SDXL 1.0 License](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENSE.md)
|
||||
"""
|
||||
with open(os.path.join(repo_folder, "README.md"), "w") as f:
|
||||
f.write(yaml + model_card)
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
|
||||
[Stable Diffusion XL](https://huggingface.co/papers/2307.01952) (or SDXL) is the latest image generation model that is tailored towards more photorealistic outputs with more detailed imagery and composition compared to previous SD models. It leverages a three times larger UNet backbone. The increase of model parameters is mainly due to more attention blocks and a larger cross-attention context as SDXL uses a second text encoder.
|
||||
|
||||
The `train_instruct_pix2pix_sdxl.py` script shows how to implement the training procedure and adapt it for Stable Diffusion XL.
|
||||
The `train_instruct_pix2pix_xl.py` script shows how to implement the training procedure and adapt it for Stable Diffusion XL.
|
||||
|
||||
***Disclaimer: Even though `train_instruct_pix2pix_sdxl.py` implements the InstructPix2Pix
|
||||
***Disclaimer: Even though `train_instruct_pix2pix_xl.py` implements the InstructPix2Pix
|
||||
training procedure while being faithful to the [original implementation](https://github.com/timothybrooks/instruct-pix2pix) we have only tested it on a [small-scale dataset](https://huggingface.co/datasets/fusing/instructpix2pix-1000-samples). This can impact the end results. For better results, we recommend longer training runs with a larger dataset. [Here](https://huggingface.co/datasets/timbrooks/instructpix2pix-clip-filtered) you can find a large dataset for InstructPix2Pix training.***
|
||||
|
||||
## Running locally with PyTorch
|
||||
@@ -33,7 +33,7 @@ export DATASET_ID="fusing/instructpix2pix-1000-samples"
|
||||
Now, we can launch training:
|
||||
|
||||
```bash
|
||||
python train_instruct_pix2pix_sdxl.py \
|
||||
python train_instruct_pix2pix_xl.py \
|
||||
--pretrained_model_name_or_path=$MODEL_NAME \
|
||||
--dataset_name=$DATASET_ID \
|
||||
--enable_xformers_memory_efficient_attention \
|
||||
@@ -50,7 +50,7 @@ Additionally, we support performing validation inference to monitor training pro
|
||||
with Weights and Biases. You can enable this feature with `report_to="wandb"`:
|
||||
|
||||
```bash
|
||||
python train_instruct_pix2pix_sdxl.py \
|
||||
python train_instruct_pix2pix_xl.py \
|
||||
--pretrained_model_name_or_path=stabilityai/stable-diffusion-xl-base-1.0 \
|
||||
--dataset_name=$DATASET_ID \
|
||||
--use_ema \
|
||||
@@ -146,48 +146,3 @@ Particularly, `image_guidance_scale` and `guidance_scale` can have a profound im
|
||||
on the generated ("edited") image (see [here](https://twitter.com/RisingSayak/status/1628392199196151808?s=20) for an example).
|
||||
|
||||
If you're looking for some interesting ways to use the InstructPix2Pix training methodology, we welcome you to check out this blog post: [Instruction-tuning Stable Diffusion with InstructPix2Pix](https://huggingface.co/blog/instruction-tuning-sd).
|
||||
|
||||
## Compare between SD and SDXL
|
||||
|
||||
We aim to understand the differences resulting from the use of SD-1.5 and SDXL-0.9 as pretrained models. To achieve this, we trained on the [small toy dataset](https://huggingface.co/datasets/fusing/instructpix2pix-1000-samples) using both of these pretrained models. The training script is as follows:
|
||||
|
||||
```bash
|
||||
export MODEL_NAME="runwayml/stable-diffusion-v1-5" or "stabilityai/stable-diffusion-xl-base-0.9"
|
||||
export DATASET_ID="fusing/instructpix2pix-1000-samples"
|
||||
|
||||
CUDA_VISIBLE_DEVICES=1 python train_instruct_pix2pix.py \
|
||||
--pretrained_model_name_or_path=$MODEL_NAME \
|
||||
--dataset_name=$DATASET_ID \
|
||||
--use_ema \
|
||||
--enable_xformers_memory_efficient_attention \
|
||||
--resolution=512 --random_flip \
|
||||
--train_batch_size=4 --gradient_accumulation_steps=4 --gradient_checkpointing \
|
||||
--max_train_steps=15000 \
|
||||
--checkpointing_steps=5000 --checkpoints_total_limit=1 \
|
||||
--learning_rate=5e-05 --lr_warmup_steps=0 \
|
||||
--conditioning_dropout_prob=0.05 \
|
||||
--seed=42 \
|
||||
--val_image_url="https://datasets-server.huggingface.co/assets/fusing/instructpix2pix-1000-samples/--/fusing--instructpix2pix-1000-samples/train/23/input_image/image.jpg" \
|
||||
--validation_prompt="make it in Japan" \
|
||||
--report_to=wandb
|
||||
```
|
||||
|
||||
We discovered that compared to training with SD-1.5 as the pretrained model, SDXL-0.9 results in a lower training loss value (SD-1.5 yields 0.0599, SDXL scores 0.0254). Moreover, from a visual perspective, the results obtained using SDXL demonstrated fewer artifacts and a richer detail. Notably, SDXL starts to preserve the structure of the original image earlier on.
|
||||
|
||||
The following two GIFs provide intuitive visual results. We observed, for each step, what kind of results could be achieved using the image
|
||||
<p align="center">
|
||||
<img src="https://datasets-server.huggingface.co/assets/fusing/instructpix2pix-1000-samples/--/fusing--instructpix2pix-1000-samples/train/23/input_image/image.jpg" alt="input for make it Japan" width=600/>
|
||||
</p>
|
||||
with "make it in Japan” as the prompt. It can be seen that SDXL starts preserving the details of the original image earlier, resulting in higher fidelity outcomes sooner.
|
||||
|
||||
* SD-1.5: https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sd_ip2p_training_val_img_progress.gif
|
||||
|
||||
<p align="center">
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sd_ip2p_training_val_img_progress.gif" alt="input for make it Japan" width=600/>
|
||||
</p>
|
||||
|
||||
* SDXL: https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl_ip2p_training_val_img_progress.gif
|
||||
|
||||
<p align="center">
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl_ip2p_training_val_img_progress.gif" alt="input for make it Japan" width=600/>
|
||||
</p>
|
||||
|
||||
@@ -52,7 +52,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
+1
-1
@@ -55,7 +55,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
@@ -54,7 +54,7 @@ if is_wandb_available():
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from diffusers.utils import check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ else:
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ else:
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.20.0.dev0")
|
||||
check_min_version("0.19.0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -233,7 +233,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="diffusers",
|
||||
version="0.20.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="0.19.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
description="Diffusers",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = "0.20.0.dev0"
|
||||
__version__ = "0.19.2"
|
||||
|
||||
from .configuration_utils import ConfigMixin
|
||||
from .utils import (
|
||||
|
||||
@@ -189,7 +189,7 @@ class UNet2DConditionLoadersMixin:
|
||||
r"""
|
||||
Load pretrained attention processor layers into [`UNet2DConditionModel`]. Attention processor layers have to be
|
||||
defined in
|
||||
[`attention_processor.py`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py)
|
||||
[`cross_attention.py`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py)
|
||||
and be a `torch.nn.Module` class.
|
||||
|
||||
Parameters:
|
||||
@@ -507,6 +507,7 @@ class UNet2DConditionLoadersMixin:
|
||||
weight_name: str = None,
|
||||
save_function: Callable = None,
|
||||
safe_serialization: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Save an attention processor to a directory so that it can be reloaded using the
|
||||
@@ -530,6 +531,12 @@ class UNet2DConditionLoadersMixin:
|
||||
CustomDiffusionXFormersAttnProcessor,
|
||||
)
|
||||
|
||||
weight_name = weight_name or deprecate(
|
||||
"weights_name",
|
||||
"0.20.0",
|
||||
"`weights_name` is deprecated, please use `weight_name` instead.",
|
||||
take_from=kwargs,
|
||||
)
|
||||
if os.path.isfile(save_directory):
|
||||
logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
|
||||
return
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
# Copyright 2023 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from ..utils import deprecate
|
||||
from .attention_processor import ( # noqa: F401
|
||||
Attention,
|
||||
AttentionProcessor,
|
||||
AttnAddedKVProcessor,
|
||||
AttnProcessor2_0,
|
||||
LoRAAttnProcessor,
|
||||
LoRALinearLayer,
|
||||
LoRAXFormersAttnProcessor,
|
||||
SlicedAttnAddedKVProcessor,
|
||||
SlicedAttnProcessor,
|
||||
XFormersAttnProcessor,
|
||||
)
|
||||
from .attention_processor import AttnProcessor as AttnProcessorRename # noqa: F401
|
||||
|
||||
|
||||
deprecate(
|
||||
"cross_attention",
|
||||
"0.20.0",
|
||||
"Importing from cross_attention is deprecated. Please import from diffusers.models.attention_processor instead.",
|
||||
standard_warn=False,
|
||||
)
|
||||
|
||||
|
||||
AttnProcessor = AttentionProcessor
|
||||
|
||||
|
||||
class CrossAttention(Attention):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class CrossAttnProcessor(AttnProcessorRename):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class LoRACrossAttnProcessor(LoRAAttnProcessor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class CrossAttnAddedKVProcessor(AttnAddedKVProcessor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class XFormersCrossAttnProcessor(XFormersAttnProcessor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class LoRAXFormersCrossAttnProcessor(LoRAXFormersAttnProcessor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class SlicedCrossAttnProcessor(SlicedAttnProcessor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class SlicedCrossAttnAddedKVProcessor(SlicedAttnAddedKVProcessor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
deprecation_message = f"{self.__class__.__name__} is deprecated and will be removed in `0.20.0`. Please use `from diffusers.models.attention_processor import {''.join(self.__class__.__name__.split('Cross'))} instead."
|
||||
deprecate("cross_attention", "0.20.0", deprecation_message, standard_warn=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -56,9 +56,9 @@ class UNet1DModel(ModelMixin, ConfigMixin):
|
||||
freq_shift (`float`, *optional*, defaults to 0.0): Frequency shift for Fourier time embedding.
|
||||
flip_sin_to_cos (`bool`, *optional*, defaults to `False`):
|
||||
Whether to flip sin to cos for Fourier time embedding.
|
||||
down_block_types (`Tuple[str]`, *optional*, defaults to `("DownBlock1DNoSkip", "DownBlock1D", "AttnDownBlock1D")`):
|
||||
down_block_types (`Tuple[str]`, *optional*, defaults to `("DownBlock1D", "DownBlock1DNoSkip", "AttnDownBlock1D")`):
|
||||
Tuple of downsample block types.
|
||||
up_block_types (`Tuple[str]`, *optional*, defaults to `("AttnUpBlock1D", "UpBlock1D", "UpBlock1DNoSkip")`):
|
||||
up_block_types (`Tuple[str]`, *optional*, defaults to `("UpBlock1D", "UpBlock1DNoSkip", "AttnUpBlock1D")`):
|
||||
Tuple of upsample block types.
|
||||
block_out_channels (`Tuple[int]`, *optional*, defaults to `(32, 32, 64)`):
|
||||
Tuple of block output channels.
|
||||
|
||||
@@ -585,7 +585,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
||||
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
||||
Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
|
||||
|
||||
@@ -634,7 +634,7 @@ class AltDiffusionImg2ImgPipeline(
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -428,7 +428,7 @@ class AudioLDMPipeline(DiffusionPipeline):
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
output_type (`str`, *optional*, defaults to `"np"`):
|
||||
The output format of the generated image. Choose between `"np"` to return a NumPy `np.ndarray` or
|
||||
`"pt"` to return a PyTorch `torch.Tensor` object.
|
||||
|
||||
@@ -294,9 +294,9 @@ class AutoPipelineForText2Image(ConfigMixin):
|
||||
Examples:
|
||||
|
||||
```py
|
||||
>>> from diffusers import AutoPipelineForText2Image
|
||||
>>> from diffusers import AutoPipelineForTextToImage
|
||||
|
||||
>>> pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
|
||||
>>> pipeline = AutoPipelineForTextToImage.from_pretrained("runwayml/stable-diffusion-v1-5")
|
||||
>>> print(pipeline.__class__)
|
||||
```
|
||||
"""
|
||||
@@ -328,13 +328,13 @@ class AutoPipelineForText2Image(ConfigMixin):
|
||||
an instantiated `DiffusionPipeline` object
|
||||
|
||||
```py
|
||||
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImageToImage
|
||||
>>> from diffusers import AutoPipelineForTextToImage, AutoPipelineForImageToImage
|
||||
|
||||
>>> pipe_i2i = AutoPipelineForImage2Image.from_pretrained(
|
||||
... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
|
||||
... )
|
||||
|
||||
>>> pipe_t2i = AutoPipelineForText2Image.from_pipe(pipe_t2i)
|
||||
>>> pipe_t2i = AutoPipelineForTextToImage.from_pipe(pipe_t2i)
|
||||
```
|
||||
"""
|
||||
|
||||
@@ -537,7 +537,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
||||
Examples:
|
||||
|
||||
```py
|
||||
>>> from diffusers import AutoPipelineForText2Image
|
||||
>>> from diffusers import AutoPipelineForTextToImage
|
||||
|
||||
>>> pipeline = AutoPipelineForImageToImage.from_pretrained("runwayml/stable-diffusion-v1-5")
|
||||
>>> print(pipeline.__class__)
|
||||
@@ -573,7 +573,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
||||
Examples:
|
||||
|
||||
```py
|
||||
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImageToImage
|
||||
>>> from diffusers import AutoPipelineForTextToImage, AutoPipelineForImageToImage
|
||||
|
||||
>>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
|
||||
... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
|
||||
@@ -781,7 +781,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
||||
Examples:
|
||||
|
||||
```py
|
||||
>>> from diffusers import AutoPipelineForText2Image
|
||||
>>> from diffusers import AutoPipelineForTextToImage
|
||||
|
||||
>>> pipeline = AutoPipelineForImageToImage.from_pretrained("runwayml/stable-diffusion-v1-5")
|
||||
>>> print(pipeline.__class__)
|
||||
@@ -817,7 +817,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
||||
Examples:
|
||||
|
||||
```py
|
||||
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForInpainting
|
||||
>>> from diffusers import AutoPipelineForTextToImage, AutoPipelineForInpainting
|
||||
|
||||
>>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
|
||||
... "DeepFloyd/IF-I-XL-v1.0", requires_safety_checker=False
|
||||
|
||||
@@ -760,7 +760,7 @@ class StableDiffusionControlNetPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
||||
|
||||
@@ -851,7 +851,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
||||
|
||||
@@ -1048,7 +1048,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 0.5):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
||||
|
||||
@@ -305,6 +305,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
||||
|
||||
text_input_ids = text_inputs.input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
|
||||
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
|
||||
text_input_ids, untruncated_ids
|
||||
@@ -753,7 +754,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
||||
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
||||
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
||||
|
||||
@@ -662,7 +662,7 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -783,7 +783,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -865,7 +865,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
noise_level (`int`, *optional*, defaults to 250):
|
||||
The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
|
||||
clean_caption (`bool`, *optional*, defaults to `True`):
|
||||
|
||||
@@ -883,7 +883,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -961,7 +961,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
noise_level (`int`, *optional*, defaults to 0):
|
||||
The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
|
||||
clean_caption (`bool`, *optional*, defaults to `True`):
|
||||
|
||||
@@ -730,7 +730,7 @@ class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
noise_level (`int`, *optional*, defaults to 250):
|
||||
The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
|
||||
clean_caption (`bool`, *optional*, defaults to `True`):
|
||||
|
||||
@@ -357,29 +357,10 @@ class FlaxDiffusionPipeline(ConfigMixin):
|
||||
# extract them here
|
||||
expected_modules, optional_kwargs = cls._get_signature_keys(pipeline_class)
|
||||
passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs}
|
||||
passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs}
|
||||
|
||||
init_dict, unused_kwargs, _ = pipeline_class.extract_init_dict(config_dict, **kwargs)
|
||||
init_dict, _, _ = pipeline_class.extract_init_dict(config_dict, **kwargs)
|
||||
|
||||
# define init kwargs
|
||||
init_kwargs = {k: init_dict.pop(k) for k in optional_kwargs if k in init_dict}
|
||||
init_kwargs = {**init_kwargs, **passed_pipe_kwargs}
|
||||
|
||||
# remove `null` components
|
||||
def load_module(name, value):
|
||||
if value[0] is None:
|
||||
return False
|
||||
if name in passed_class_obj and passed_class_obj[name] is None:
|
||||
return False
|
||||
return True
|
||||
|
||||
init_dict = {k: v for k, v in init_dict.items() if load_module(k, v)}
|
||||
|
||||
# Throw nice warnings / errors for fast accelerate loading
|
||||
if len(unused_kwargs) > 0:
|
||||
logger.warning(
|
||||
f"Keyword arguments {unused_kwargs} are not expected by {pipeline_class.__name__} and will be ignored."
|
||||
)
|
||||
init_kwargs = {}
|
||||
|
||||
# inference_params
|
||||
params = {}
|
||||
|
||||
@@ -1375,7 +1375,7 @@ class DiffusionPipeline(ConfigMixin):
|
||||
# if the whole pipeline is cached we don't have to ping the Hub
|
||||
if revision in DEPRECATED_REVISION_ARGS and version.parse(
|
||||
version.parse(__version__).base_version
|
||||
) >= version.parse("0.22.0"):
|
||||
) >= version.parse("0.20.0"):
|
||||
warn_deprecated_model_variant(
|
||||
pretrained_model_name, use_auth_token, variant, revision, model_filenames
|
||||
)
|
||||
|
||||
+2
-2
@@ -442,7 +442,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline):
|
||||
if do_classifier_free_guidance:
|
||||
uncond_tokens: List[str]
|
||||
if negative_prompt is None:
|
||||
uncond_tokens = [""] * batch_size
|
||||
uncond_tokens = [""]
|
||||
elif type(prompt) is not type(negative_prompt):
|
||||
raise TypeError(
|
||||
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
||||
@@ -471,7 +471,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline):
|
||||
|
||||
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
||||
seq_len = uncond_embeddings.shape[1]
|
||||
uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1)
|
||||
uncond_embeddings = uncond_embeddings.repeat(batch_size, num_images_per_prompt, 1)
|
||||
uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1)
|
||||
|
||||
# For classifier free guidance, we need to do two forward passes.
|
||||
|
||||
@@ -1186,6 +1186,7 @@ def download_from_original_stable_diffusion_ckpt(
|
||||
StableDiffusionInpaintPipeline,
|
||||
StableDiffusionPipeline,
|
||||
StableDiffusionXLImg2ImgPipeline,
|
||||
StableDiffusionXLPipeline,
|
||||
StableUnCLIPImg2ImgPipeline,
|
||||
StableUnCLIPPipeline,
|
||||
)
|
||||
@@ -1542,7 +1543,7 @@ def download_from_original_stable_diffusion_ckpt(
|
||||
checkpoint, config_name, prefix="conditioner.embedders.1.model.", has_projection=True, **config_kwargs
|
||||
)
|
||||
|
||||
pipe = pipeline_class(
|
||||
pipe = StableDiffusionXLPipeline(
|
||||
vae=vae,
|
||||
text_encoder=text_encoder,
|
||||
tokenizer=tokenizer,
|
||||
|
||||
@@ -644,7 +644,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Example:
|
||||
|
||||
|
||||
@@ -585,7 +585,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
||||
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
||||
Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
|
||||
|
||||
+1
-1
@@ -750,7 +750,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
max_iter_to_alter (`int`, *optional*, defaults to `25`):
|
||||
Number of denoising steps to apply attend-and-excite. The `max_iter_to_alter` denoising steps are when
|
||||
attend-and-excite is applied. For example, if `max_iter_to_alter` is `25` and there are a total of `30`
|
||||
|
||||
@@ -570,7 +570,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -902,7 +902,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -1110,7 +1110,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
lambda_auto_corr (`float`, *optional*, defaults to 20.0):
|
||||
Lambda parameter to control auto correction.
|
||||
lambda_kl (`float`, *optional*, defaults to 20.0):
|
||||
@@ -1352,7 +1352,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -640,7 +640,7 @@ class StableDiffusionImg2ImgPipeline(
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -756,7 +756,7 @@ class StableDiffusionInpaintPipeline(
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
+1
-1
@@ -606,7 +606,7 @@ class StableDiffusionInpaintPipelineLegacy(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Returns:
|
||||
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
|
||||
|
||||
@@ -41,10 +41,11 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
EXAMPLE_DOC_STRING = """
|
||||
Examples:
|
||||
```python
|
||||
>>> from diffusers import StableDiffusionLDM3DPipeline
|
||||
```py
|
||||
>>> import torch
|
||||
>>> from diffusers import StableDiffusionPipeline
|
||||
|
||||
>>> pipe = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d-4c")
|
||||
>>> pipe = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d")
|
||||
>>> pipe = pipe.to("cuda")
|
||||
|
||||
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
||||
@@ -62,10 +63,7 @@ class LDM3DPipelineOutput(BaseOutput):
|
||||
Output class for Stable Diffusion pipelines.
|
||||
|
||||
Args:
|
||||
rgb (`List[PIL.Image.Image]` or `np.ndarray`)
|
||||
List of denoised PIL images of length `batch_size` or NumPy array of shape `(batch_size, height, width,
|
||||
num_channels)`.
|
||||
depth (`List[PIL.Image.Image]` or `np.ndarray`)
|
||||
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
||||
List of denoised PIL images of length `batch_size` or NumPy array of shape `(batch_size, height, width,
|
||||
num_channels)`.
|
||||
nsfw_content_detected (`List[bool]`)
|
||||
@@ -543,7 +541,7 @@ class StableDiffusionLDM3DPipeline(
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -617,7 +617,7 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -517,7 +517,7 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
circular_padding (`bool`, *optional*, defaults to `False`):
|
||||
If set to `True`, circular padding is applied to ensure there are no stitching artifacts. Circular
|
||||
padding allows the model to seamlessly generate a transition from the rightmost part of the image to
|
||||
|
||||
@@ -546,7 +546,7 @@ class StableDiffusionParadigmsPipeline(
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
debug (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to run in debug mode. In debug mode, `torch.cumsum` is evaluated using the CPU.
|
||||
|
||||
|
||||
@@ -503,7 +503,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin)
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -561,7 +561,7 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
```py
|
||||
|
||||
@@ -684,7 +684,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
noise_level (`int`, *optional*, defaults to `0`):
|
||||
The amount of noise to add to the image embeddings. A higher `noise_level` increases the variance in
|
||||
the final un-noised images. See [`StableUnCLIPPipeline.noise_image_embeddings`] for more details.
|
||||
|
||||
@@ -654,7 +654,7 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
noise_level (`int`, *optional*, defaults to `0`):
|
||||
The amount of noise to add to the image embeddings. A higher `noise_level` increases the variance in
|
||||
the final un-noised images. See [`StableUnCLIPPipeline.noise_image_embeddings`] for more details.
|
||||
|
||||
@@ -30,7 +30,7 @@ try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
||||
from ...utils.dummy_torch_and_transformers_and_objects import * # noqa F403
|
||||
else:
|
||||
from .pipeline_stable_diffusion_xl import StableDiffusionXLPipeline
|
||||
from .pipeline_stable_diffusion_xl_img2img import StableDiffusionXLImg2ImgPipeline
|
||||
|
||||
@@ -310,6 +310,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad
|
||||
|
||||
text_input_ids = text_inputs.input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
|
||||
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
|
||||
text_input_ids, untruncated_ids
|
||||
@@ -662,7 +663,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
||||
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
||||
Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
|
||||
|
||||
@@ -317,6 +317,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin, L
|
||||
|
||||
text_input_ids = text_inputs.input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
|
||||
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
|
||||
text_input_ids, untruncated_ids
|
||||
@@ -782,7 +783,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin, L
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
||||
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
||||
Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
|
||||
|
||||
@@ -458,6 +458,7 @@ class StableDiffusionXLInpaintPipeline(
|
||||
|
||||
text_input_ids = text_inputs.input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
||||
|
||||
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
|
||||
text_input_ids, untruncated_ids
|
||||
@@ -992,7 +993,7 @@ class StableDiffusionXLInpaintPipeline(
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
||||
`original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
|
||||
|
||||
+1
-1
@@ -688,7 +688,7 @@ class StableDiffusionXLInstructPix2PixPipeline(DiffusionPipeline, FromSingleFile
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
||||
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
||||
Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
|
||||
|
||||
@@ -625,7 +625,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the `AttnProcessor` as defined under
|
||||
`self.processor` in
|
||||
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
adapter_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
||||
The outputs of the adapter are multiplied by `adapter_conditioning_scale` before they are added to the
|
||||
residual in the original unet. If multiple adapters are specified in init, you can set the
|
||||
|
||||
@@ -516,7 +516,7 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
+1
-1
@@ -600,7 +600,7 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
|
||||
Examples:
|
||||
|
||||
|
||||
@@ -378,7 +378,7 @@ class TextToVideoZeroPipeline(StableDiffusionPipeline):
|
||||
Extra_step_kwargs.
|
||||
cross_attention_kwargs:
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
||||
num_warmup_steps:
|
||||
number of warmup steps.
|
||||
|
||||
|
||||
@@ -280,7 +280,7 @@ def _get_model_file(
|
||||
if (
|
||||
revision in DEPRECATED_REVISION_ARGS
|
||||
and (weights_name == WEIGHTS_NAME or weights_name == SAFETENSORS_WEIGHTS_NAME)
|
||||
and version.parse(version.parse(__version__).base_version) >= version.parse("0.22.0")
|
||||
and version.parse(version.parse(__version__).base_version) >= version.parse("0.20.0")
|
||||
):
|
||||
try:
|
||||
model_file = hf_hub_download(
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
import inspect
|
||||
import unittest
|
||||
from importlib import import_module
|
||||
|
||||
|
||||
class DependencyTester(unittest.TestCase):
|
||||
@@ -38,13 +37,3 @@ class DependencyTester(unittest.TestCase):
|
||||
elif backend == "invisible_watermark":
|
||||
backend = "invisible-watermark"
|
||||
assert backend in deps, f"{backend} is not in the deps table!"
|
||||
|
||||
def test_pipeline_imports(self):
|
||||
import diffusers
|
||||
import diffusers.pipelines
|
||||
|
||||
all_classes = inspect.getmembers(diffusers, inspect.isclass)
|
||||
for cls_name, cls_module in all_classes:
|
||||
if hasattr(diffusers.pipelines, cls_name):
|
||||
pipeline_folder_module = ".".join(str(cls_module.__module__).split(".")[:3])
|
||||
_ = import_module(pipeline_folder_module, str(cls_name))
|
||||
|
||||
@@ -63,9 +63,9 @@ def _test_img2img_compile(in_queue, out_queue, timeout):
|
||||
|
||||
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
pipe.unet.to(memory_format=torch.channels_last)
|
||||
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
@@ -73,7 +73,7 @@ def _test_img2img_compile(in_queue, out_queue, timeout):
|
||||
image_slice = image[0, -3:, -3:, -1].flatten()
|
||||
|
||||
assert image.shape == (1, 512, 768, 3)
|
||||
expected_slice = np.array([0.0606, 0.0570, 0.0805, 0.0579, 0.0628, 0.0623, 0.0843, 0.1115, 0.0806])
|
||||
expected_slice = np.array([0.0593, 0.0607, 0.0851, 0.0582, 0.0636, 0.0721, 0.0751, 0.0981, 0.0781])
|
||||
|
||||
assert np.abs(expected_slice - image_slice).max() < 1e-3
|
||||
except Exception:
|
||||
|
||||
@@ -64,7 +64,6 @@ def _test_inpaint_compile(in_queue, out_queue, timeout):
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained(
|
||||
"runwayml/stable-diffusion-inpainting", safety_checker=None
|
||||
)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -76,7 +75,8 @@ def _test_inpaint_compile(in_queue, out_queue, timeout):
|
||||
image_slice = image[0, 253:256, 253:256, -1].flatten()
|
||||
|
||||
assert image.shape == (1, 512, 512, 3)
|
||||
expected_slice = np.array([0.0689, 0.0699, 0.0790, 0.0536, 0.0470, 0.0488, 0.041, 0.0508, 0.04179])
|
||||
expected_slice = np.array([0.0425, 0.0273, 0.0344, 0.1694, 0.1727, 0.1812, 0.3256, 0.3311, 0.3272])
|
||||
|
||||
assert np.abs(expected_slice - image_slice).max() < 3e-3
|
||||
except Exception:
|
||||
error = f"{traceback.format_exc()}"
|
||||
@@ -382,7 +382,6 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained(
|
||||
"runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, safety_checker=None
|
||||
)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing()
|
||||
@@ -392,7 +391,8 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
image_slice = image[0, 253:256, 253:256, -1].flatten()
|
||||
|
||||
assert image.shape == (1, 512, 512, 3)
|
||||
expected_slice = np.array([0.1509, 0.1245, 0.1672, 0.1655, 0.1519, 0.1226, 0.1462, 0.1567, 0.2451])
|
||||
expected_slice = np.array([0.1350, 0.1123, 0.1350, 0.1641, 0.1328, 0.1230, 0.1289, 0.1531, 0.1687])
|
||||
|
||||
assert np.abs(expected_slice - image_slice).max() < 5e-2
|
||||
|
||||
def test_stable_diffusion_inpaint_pndm(self):
|
||||
@@ -485,7 +485,6 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
"runwayml/stable-diffusion-inpainting", safety_checker=None
|
||||
)
|
||||
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing()
|
||||
@@ -498,12 +497,11 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
assert image.shape == (1, 512, 512, 3)
|
||||
|
||||
image_slice = image[0, 253:256, 253:256, -1].flatten()
|
||||
expected_slice = np.array([0.2728, 0.2803, 0.2665, 0.2511, 0.2774, 0.2586, 0.2391, 0.2392, 0.2582])
|
||||
assert np.abs(expected_slice - image_slice).max() < 1e-3
|
||||
expected_slice = np.array([0.0021, 0.2350, 0.3712, 0.0575, 0.2485, 0.3451, 0.1857, 0.3156, 0.3943])
|
||||
assert np.abs(expected_slice - image_slice).max() < 3e-3
|
||||
|
||||
def test_stable_diffusion_simple_inpaint_ddim(self):
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing()
|
||||
@@ -514,8 +512,9 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
image_slice = image[0, 253:256, 253:256, -1].flatten()
|
||||
|
||||
assert image.shape == (1, 512, 512, 3)
|
||||
expected_slice = np.array([0.3757, 0.3875, 0.4445, 0.4353, 0.3780, 0.4513, 0.3965, 0.3984, 0.4362])
|
||||
assert np.abs(expected_slice - image_slice).max() < 1e-3
|
||||
expected_slice = np.array([0.5157, 0.6858, 0.6873, 0.4619, 0.6416, 0.6898, 0.3702, 0.5960, 0.6935])
|
||||
|
||||
assert np.abs(expected_slice - image_slice).max() < 6e-4
|
||||
|
||||
def test_download_local(self):
|
||||
filename = hf_hub_download("runwayml/stable-diffusion-inpainting", filename="sd-v1-5-inpainting.ckpt")
|
||||
|
||||
@@ -299,7 +299,6 @@ class StableDiffusionPanoramaSlowTests(unittest.TestCase):
|
||||
"stabilityai/stable-diffusion-2-base", safety_checker=None
|
||||
)
|
||||
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing()
|
||||
@@ -307,6 +306,7 @@ class StableDiffusionPanoramaSlowTests(unittest.TestCase):
|
||||
inputs = self.get_inputs()
|
||||
image = pipe(**inputs).images
|
||||
image_slice = image[0, -3:, -3:, -1].flatten()
|
||||
|
||||
assert image.shape == (1, 512, 2048, 3)
|
||||
|
||||
expected_slice = np.array(
|
||||
@@ -325,7 +325,7 @@ class StableDiffusionPanoramaSlowTests(unittest.TestCase):
|
||||
]
|
||||
)
|
||||
|
||||
assert np.abs(expected_slice - image_slice).max() < 1e-2
|
||||
assert np.abs(expected_slice - image_slice).max() < 1e-3
|
||||
|
||||
def test_stable_diffusion_panorama_intermediate_state(self):
|
||||
number_of_steps = 0
|
||||
|
||||
@@ -344,7 +344,6 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
|
||||
)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -360,7 +359,6 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
|
||||
|
||||
# disable slicing
|
||||
pipe.disable_attention_slicing()
|
||||
pipe.unet.set_default_attn_processor()
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
image = pipe(**inputs).images
|
||||
|
||||
|
||||
@@ -417,7 +417,6 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
|
||||
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
|
||||
"stabilityai/stable-diffusion-2-depth", safety_checker=None
|
||||
)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -528,7 +528,6 @@ class PipelineTesterMixin:
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
pipe_fp16 = self.pipeline_class(**components)
|
||||
pipe_fp16.to(torch_device, torch.float16)
|
||||
pipe_fp16.set_progress_bar_config(disable=None)
|
||||
|
||||
Reference in New Issue
Block a user