Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ea238e821b | |||
| b6d1d670fc | |||
| 4330a747d4 | |||
| 76de6a09fb | |||
| 25caf24ef9 | |||
| 8db3c9bc9f | |||
| e0e9f81971 | |||
| 5d848ec07c |
@@ -53,6 +53,8 @@ jobs:
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -U setuptools wheel twine
|
||||
pip install -U torch --index-url https://download.pytorch.org/whl/cpu
|
||||
pip install -U transformers
|
||||
|
||||
- name: Build the dist files
|
||||
run: python setup.py bdist_wheel && python setup.py sdist
|
||||
|
||||
@@ -77,7 +77,7 @@ Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggi
|
||||
|
||||
## Quickstart
|
||||
|
||||
Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 19000+ checkpoints):
|
||||
Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 22000+ checkpoints):
|
||||
|
||||
```python
|
||||
from diffusers import DiffusionPipeline
|
||||
@@ -219,7 +219,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
|
||||
- https://github.com/deep-floyd/IF
|
||||
- https://github.com/bentoml/BentoML
|
||||
- https://github.com/bmaltais/kohya_ss
|
||||
- +8000 other amazing GitHub repositories 💪
|
||||
- +9000 other amazing GitHub repositories 💪
|
||||
|
||||
Thank you for using us ❤️.
|
||||
|
||||
|
||||
@@ -400,14 +400,14 @@
|
||||
title: DPMSolverSDEScheduler
|
||||
- local: api/schedulers/singlestep_dpm_solver
|
||||
title: DPMSolverSinglestepScheduler
|
||||
- local: api/schedulers/edm_multistep_dpm_solver
|
||||
title: EDMDPMSolverMultistepScheduler
|
||||
- local: api/schedulers/edm_euler
|
||||
title: EDMEulerScheduler
|
||||
- local: api/schedulers/euler_ancestral
|
||||
title: EulerAncestralDiscreteScheduler
|
||||
- local: api/schedulers/euler
|
||||
title: EulerDiscreteScheduler
|
||||
- local: api/schedulers/edm_euler
|
||||
title: EDMEulerScheduler
|
||||
- local: api/schedulers/edm_multistep_dpm_solver
|
||||
title: EDMDPMSolverMultistepScheduler
|
||||
- local: api/schedulers/heun
|
||||
title: HeunDiscreteScheduler
|
||||
- local: api/schedulers/ipndm
|
||||
|
||||
@@ -172,3 +172,41 @@ inpaint = StableDiffusionInpaintPipeline(**text2img.components)
|
||||
|
||||
# now you can use text2img(...), img2img(...), inpaint(...) just like the call methods of each respective pipeline
|
||||
```
|
||||
|
||||
### Create web demos using `gradio`
|
||||
|
||||
The Stable Diffusion pipelines are automatically supported in [Gradio](https://github.com/gradio-app/gradio/), a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
|
||||
|
||||
```
|
||||
pip install -U gradio
|
||||
```
|
||||
|
||||
Then, create a web demo around any Stable Diffusion-based pipeline. For example, you can create an image generation pipeline in a single line of code with Gradio's [`Interface.from_pipeline`](https://www.gradio.app/docs/interface#interface-from-pipeline) function:
|
||||
|
||||
```py
|
||||
from diffusers import StableDiffusionPipeline
|
||||
import gradio as gr
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
|
||||
|
||||
gr.Interface.from_pipeline(pipe).launch()
|
||||
```
|
||||
|
||||
which opens an intuitive drag-and-drop interface in your browser:
|
||||
|
||||

|
||||
|
||||
Similarly, you could create a demo for an image-to-image pipeline with:
|
||||
|
||||
```py
|
||||
from diffusers import StableDiffusionImg2ImgPipeline
|
||||
import gradio as gr
|
||||
|
||||
|
||||
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
|
||||
|
||||
gr.Interface.from_pipeline(pipe).launch()
|
||||
```
|
||||
|
||||
By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
|
||||
link by setting `share=True` in `launch()`. Or, you can host your demo on [Hugging Face Spaces](https://huggingface.co/spaces)https://huggingface.co/spaces for a permanent link.
|
||||
@@ -70,7 +70,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ from diffusers.utils.torch_utils import is_compiled_module
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -40,7 +40,8 @@ from diffusers.utils import BaseOutput, check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
|
||||
class MarigoldDepthOutput(BaseOutput):
|
||||
"""
|
||||
|
||||
@@ -72,7 +72,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ from diffusers.utils import check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
# Cache compiled models across invocations of this script.
|
||||
cc.initialize_cache(os.path.expanduser("~/.cache/jax/compilation_cache"))
|
||||
|
||||
@@ -70,7 +70,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ from diffusers.utils.torch_utils import is_compiled_module
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ if is_wandb_available():
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ from diffusers.utils import check_min_version, is_wandb_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ from diffusers.utils import check_min_version, is_wandb_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ if is_wandb_available():
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
+1
-1
@@ -637,7 +637,7 @@ def main(args):
|
||||
generator=generator,
|
||||
batch_size=args.eval_batch_size,
|
||||
num_inference_steps=args.ddpm_num_inference_steps,
|
||||
output_type="numpy",
|
||||
output_type="np",
|
||||
).images
|
||||
|
||||
if args.use_ema:
|
||||
|
||||
@@ -60,7 +60,7 @@ if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ if is_wandb_available():
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ from diffusers.utils import check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ from diffusers.utils.torch_utils import is_compiled_module
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ from diffusers.utils.torch_utils import is_compiled_module
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ from diffusers.utils.torch_utils import is_compiled_module
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ else:
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ else:
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ else:
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
@@ -648,7 +648,7 @@ def main(args):
|
||||
generator=generator,
|
||||
batch_size=args.eval_batch_size,
|
||||
num_inference_steps=args.ddpm_num_inference_steps,
|
||||
output_type="numpy",
|
||||
output_type="np",
|
||||
).images
|
||||
|
||||
if args.use_ema:
|
||||
|
||||
@@ -50,7 +50,7 @@ if is_wandb_available():
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ if is_wandb_available():
|
||||
|
||||
|
||||
# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
|
||||
check_min_version("0.27.0")
|
||||
check_min_version("0.28.0.dev0")
|
||||
|
||||
logger = get_logger(__name__, log_level="INFO")
|
||||
|
||||
|
||||
@@ -249,7 +249,7 @@ version_range_max = max(sys.version_info[1], 10) + 1
|
||||
|
||||
setup(
|
||||
name="diffusers",
|
||||
version="0.27.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="0.28.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
description="State-of-the-art diffusion in PyTorch and JAX.",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = "0.27.2"
|
||||
__version__ = "0.28.0.dev0"
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -293,7 +293,7 @@ class BasicTransformerBlock(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
# Notice that normalization is always applied before the real computation in the following blocks.
|
||||
# 0. Self-Attention
|
||||
|
||||
@@ -767,7 +767,18 @@ class AttnProcessor:
|
||||
query = attn.to_q(hidden_states)
|
||||
|
||||
if encoder_hidden_states is None:
|
||||
encoder_hidden_states = hidden_states
|
||||
# encoder_hidden_states = hidden_states
|
||||
batch, seq, dim = hidden_states.shape
|
||||
height = width = seq**0.5
|
||||
# reshape to (batch, height, width, dim)
|
||||
encoder_hidden_states = hidden_states.view(batch, height, width, dim)
|
||||
# reshape to (batch, dim, height, width)
|
||||
encoder_hidden_states = encoder_hidden_states.permute(0, 3, 1, 2)
|
||||
encoder_hidden_states = torch.nn.functional.avg_pool2d(hidden_states, kernel_size=4)
|
||||
# reshape to (batch, dim, seq)
|
||||
encoder_hidden_states = encoder_hidden_states.view(batch, dim, -1)
|
||||
# reshape to (batch, seq, dim)
|
||||
encoder_hidden_states = encoder_hidden_states.permute(0, 2, 1)
|
||||
elif attn.norm_cross:
|
||||
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
|
||||
|
||||
@@ -1259,7 +1270,18 @@ class AttnProcessor2_0:
|
||||
query = attn.to_q(hidden_states)
|
||||
|
||||
if encoder_hidden_states is None:
|
||||
encoder_hidden_states = hidden_states
|
||||
# encoder_hidden_states = hidden_states
|
||||
batch, seq, dim = hidden_states.shape
|
||||
height = width = seq**0.5
|
||||
# reshape to (batch, height, width, dim)
|
||||
encoder_hidden_states = hidden_states.view(batch, height, width, dim)
|
||||
# reshape to (batch, dim, height, width)
|
||||
encoder_hidden_states = encoder_hidden_states.permute(0, 3, 1, 2)
|
||||
encoder_hidden_states = torch.nn.functional.avg_pool2d(hidden_states, kernel_size=4)
|
||||
# reshape to (batch, dim, seq)
|
||||
encoder_hidden_states = encoder_hidden_states.view(batch, dim, -1)
|
||||
# reshape to (batch, seq, dim)
|
||||
encoder_hidden_states = encoder_hidden_states.permute(0, 2, 1)
|
||||
elif attn.norm_cross:
|
||||
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
|
||||
|
||||
|
||||
@@ -308,7 +308,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
|
||||
"""
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
# ensure attention_mask is a bias, and give it a singleton query_tokens dimension.
|
||||
# we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward.
|
||||
# we can tell by counting dims; if ndim == 2: it's a mask rather than a bias.
|
||||
|
||||
@@ -846,7 +846,7 @@ class UNetMidBlock2DCrossAttn(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
hidden_states = self.resnets[0](hidden_states, temb)
|
||||
for attn, resnet in zip(self.attentions, self.resnets[1:]):
|
||||
@@ -986,7 +986,7 @@ class UNetMidBlock2DSimpleCrossAttn(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
if attention_mask is None:
|
||||
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
|
||||
@@ -1116,7 +1116,7 @@ class AttnDownBlock2D(nn.Module):
|
||||
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
|
||||
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
output_states = ()
|
||||
|
||||
@@ -1241,7 +1241,7 @@ class CrossAttnDownBlock2D(nn.Module):
|
||||
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
output_states = ()
|
||||
|
||||
@@ -1986,7 +1986,7 @@ class SimpleCrossAttnDownBlock2D(nn.Module):
|
||||
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
|
||||
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
output_states = ()
|
||||
|
||||
@@ -2201,7 +2201,7 @@ class KCrossAttnDownBlock2D(nn.Module):
|
||||
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
|
||||
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
output_states = ()
|
||||
|
||||
@@ -2483,7 +2483,7 @@ class CrossAttnUpBlock2D(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
is_freeu_enabled = (
|
||||
getattr(self, "s1", None)
|
||||
@@ -3312,7 +3312,7 @@ class SimpleCrossAttnUpBlock2D(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
if attention_mask is None:
|
||||
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
|
||||
@@ -3694,7 +3694,7 @@ class KAttentionBlock(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
# 1. Self-Attention
|
||||
if self.add_self_attention:
|
||||
|
||||
@@ -1081,8 +1081,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin,
|
||||
A tuple of tensors that if specified are added to the residuals of down unet blocks.
|
||||
mid_block_additional_residual: (`torch.Tensor`, *optional*):
|
||||
A tensor that if specified is added to the residual of the middle unet block.
|
||||
down_intrablock_additional_residuals (`tuple` of `torch.Tensor`, *optional*):
|
||||
additional residuals to be added within UNet down blocks, for example from T2I-Adapter side model(s)
|
||||
encoder_attention_mask (`torch.Tensor`):
|
||||
A cross-attention mask of shape `(batch, sequence_length)` is applied to `encoder_hidden_states`. If
|
||||
`True` the mask is kept, otherwise if `False` it is discarded. Mask will be converted into a bias,
|
||||
@@ -1090,6 +1088,18 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin,
|
||||
return_dict (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to return a [`~models.unets.unet_2d_condition.UNet2DConditionOutput`] instead of a plain
|
||||
tuple.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttnProcessor`].
|
||||
added_cond_kwargs: (`dict`, *optional*):
|
||||
A kwargs dictionary containin additional embeddings that if specified are added to the embeddings that
|
||||
are passed along to the UNet blocks.
|
||||
down_block_additional_residuals (`tuple` of `torch.Tensor`, *optional*):
|
||||
additional residuals to be added to UNet long skip connections from down blocks to up blocks for
|
||||
example from ControlNet side model(s)
|
||||
mid_block_additional_residual (`torch.Tensor`, *optional*):
|
||||
additional residual to be added to UNet mid block output, for example from ControlNet side model
|
||||
down_intrablock_additional_residuals (`tuple` of `torch.Tensor`, *optional*):
|
||||
additional residuals to be added within UNet down blocks, for example from T2I-Adapter side model(s)
|
||||
|
||||
Returns:
|
||||
[`~models.unets.unet_2d_condition.UNet2DConditionOutput`] or `tuple`:
|
||||
@@ -1175,14 +1185,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin,
|
||||
cross_attention_kwargs["gligen"] = {"objs": self.position_net(**gligen_args)}
|
||||
|
||||
# 3. down
|
||||
# we're popping the `scale` instead of getting it because otherwise `scale` will be propagated
|
||||
# to the internal blocks and will raise deprecation warnings. this will be confusing for our users.
|
||||
if cross_attention_kwargs is not None:
|
||||
cross_attention_kwargs = cross_attention_kwargs.copy()
|
||||
lora_scale = cross_attention_kwargs.pop("scale", 1.0)
|
||||
else:
|
||||
lora_scale = 1.0
|
||||
|
||||
lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
|
||||
if USE_PEFT_BACKEND:
|
||||
# weight the lora layers by setting `lora_scale` for each PEFT layer
|
||||
scale_lora_layers(self, lora_scale)
|
||||
|
||||
@@ -1183,7 +1183,7 @@ class CrossAttnDownBlockMotion(nn.Module):
|
||||
):
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
output_states = ()
|
||||
|
||||
@@ -1367,7 +1367,7 @@ class CrossAttnUpBlockMotion(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
is_freeu_enabled = (
|
||||
getattr(self, "s1", None)
|
||||
@@ -1707,7 +1707,7 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
hidden_states = self.resnets[0](hidden_states, temb)
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
||||
on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
|
||||
process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
|
||||
essentially ignores `image`.
|
||||
num_inference_steps (`int`, *optional*, defaults to 16):
|
||||
num_inference_steps (`int`, *optional*, defaults to 12):
|
||||
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
||||
expense of slower inference.
|
||||
guidance_scale (`float`, *optional*, defaults to 10.0):
|
||||
@@ -191,7 +191,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
||||
negative_prompt_embeds is None and negative_encoder_hidden_states is not None
|
||||
):
|
||||
raise ValueError(
|
||||
"pass either both `negatve_prompt_embeds` and `negative_encoder_hidden_states` or neither"
|
||||
"pass either both `negative_prompt_embeds` and `negative_encoder_hidden_states` or neither"
|
||||
)
|
||||
|
||||
if (prompt is None and prompt_embeds is None) or (prompt is not None and prompt_embeds is not None):
|
||||
|
||||
@@ -824,20 +824,22 @@ class StableDiffusionControlNetPipeline(
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -869,20 +869,22 @@ class StableDiffusionXLControlNetPipeline(
|
||||
self.vae.decoder.mid_block.to(dtype)
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
+5
-5
@@ -133,7 +133,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
|
||||
generator: Optional[torch.Generator] = None,
|
||||
num_inference_steps: int = 100,
|
||||
return_dict: bool = True,
|
||||
output_type: str = "numpy",
|
||||
output_type: str = "np",
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
) -> Union[AudioPipelineOutput, Tuple]:
|
||||
@@ -157,7 +157,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
|
||||
expense of slower inference.
|
||||
return_dict (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
|
||||
output_type (`str`, *optional*, defaults to `"numpy"`):
|
||||
output_type (`str`, *optional*, defaults to `"np"`):
|
||||
The output format of the generated audio.
|
||||
callback (`Callable`, *optional*):
|
||||
A function that calls every `callback_steps` steps during inference. The function is called with the
|
||||
@@ -249,16 +249,16 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
|
||||
|
||||
logger.info("Generated segment", i)
|
||||
|
||||
if output_type == "numpy" and not is_onnx_available():
|
||||
if output_type == "np" and not is_onnx_available():
|
||||
raise ValueError(
|
||||
"Cannot return output in 'np' format if ONNX is not available. Make sure to have ONNX installed or set 'output_type' to 'mel'."
|
||||
)
|
||||
elif output_type == "numpy" and self.melgan is None:
|
||||
elif output_type == "np" and self.melgan is None:
|
||||
raise ValueError(
|
||||
"Cannot return output in 'np' format if melgan component is not defined. Make sure to define `self.melgan` or set 'output_type' to 'mel'."
|
||||
)
|
||||
|
||||
if output_type == "numpy":
|
||||
if output_type == "np":
|
||||
output = self.melgan(input_features=full_pred_mel.astype(np.float32))
|
||||
else:
|
||||
output = full_pred_mel
|
||||
|
||||
+3
@@ -528,12 +528,15 @@ class StableDiffusionInpaintPipelineLegacy(
|
||||
f" {negative_prompt_embeds.shape}."
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
||||
def get_timesteps(self, num_inference_steps, strength, device):
|
||||
# get the original timestep using init_timestep
|
||||
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
||||
|
||||
t_start = max(num_inference_steps - init_timestep, 0)
|
||||
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
||||
if hasattr(self.scheduler, "set_begin_index"):
|
||||
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
||||
|
||||
return timesteps, num_inference_steps - t_start
|
||||
|
||||
|
||||
@@ -2004,7 +2004,7 @@ class CrossAttnUpBlockFlat(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
is_freeu_enabled = (
|
||||
getattr(self, "s1", None)
|
||||
@@ -2338,7 +2338,7 @@ class UNetMidBlockFlatCrossAttn(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
if cross_attention_kwargs is not None:
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
hidden_states = self.resnets[0](hidden_states, temb)
|
||||
for attn, resnet in zip(self.attentions, self.resnets[1:]):
|
||||
@@ -2479,7 +2479,7 @@ class UNetMidBlockFlatSimpleCrossAttn(nn.Module):
|
||||
) -> torch.FloatTensor:
|
||||
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
|
||||
if cross_attention_kwargs.get("scale", None) is not None:
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.")
|
||||
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
|
||||
|
||||
if attention_mask is None:
|
||||
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
|
||||
|
||||
+9
-7
@@ -548,20 +548,22 @@ class LatentConsistencyModelImg2ImgPipeline(
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
+9
-7
@@ -490,20 +490,22 @@ class LatentConsistencyModelPipeline(
|
||||
latents = latents * self.scheduler.init_noise_sigma
|
||||
return latents
|
||||
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -713,20 +713,22 @@ class LEditsPPPipelineStableDiffusionXL(
|
||||
self.vae.decoder.mid_block.to(dtype)
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -100,10 +100,8 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
||||
)
|
||||
self.register_to_config(latent_dim_scale=latent_dim_scale)
|
||||
|
||||
def prepare_latents(
|
||||
self, batch_size, image_embeddings, num_images_per_prompt, dtype, device, generator, latents, scheduler
|
||||
):
|
||||
_, channels, height, width = image_embeddings.shape
|
||||
def prepare_latents(self, image_embeddings, num_images_per_prompt, dtype, device, generator, latents, scheduler):
|
||||
batch_size, channels, height, width = image_embeddings.shape
|
||||
latents_shape = (
|
||||
batch_size * num_images_per_prompt,
|
||||
4,
|
||||
@@ -385,19 +383,7 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
||||
)
|
||||
if isinstance(image_embeddings, list):
|
||||
image_embeddings = torch.cat(image_embeddings, dim=0)
|
||||
|
||||
if prompt is not None and isinstance(prompt, str):
|
||||
batch_size = 1
|
||||
elif prompt is not None and isinstance(prompt, list):
|
||||
batch_size = len(prompt)
|
||||
else:
|
||||
batch_size = prompt_embeds.shape[0]
|
||||
|
||||
# Compute the effective number of images per prompt
|
||||
# We must account for the fact that the image embeddings from the prior can be generated with num_images_per_prompt > 1
|
||||
# This results in a case where a single prompt is associated with multiple image embeddings
|
||||
# Divide the number of image embeddings by the batch size to determine if this is the case.
|
||||
num_images_per_prompt = num_images_per_prompt * (image_embeddings.shape[0] // batch_size)
|
||||
batch_size = image_embeddings.shape[0]
|
||||
|
||||
# 2. Encode caption
|
||||
if prompt_embeds is None and negative_prompt_embeds is None:
|
||||
@@ -431,7 +417,7 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
||||
|
||||
# 5. Prepare latents
|
||||
latents = self.prepare_latents(
|
||||
batch_size, image_embeddings, num_images_per_prompt, dtype, device, generator, latents, self.scheduler
|
||||
image_embeddings, num_images_per_prompt, dtype, device, generator, latents, self.scheduler
|
||||
)
|
||||
|
||||
# 6. Run denoising loop
|
||||
|
||||
@@ -669,20 +669,22 @@ class StableDiffusionPipeline(
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -767,20 +767,22 @@ class StableDiffusionImg2ImgPipeline(
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -909,20 +909,22 @@ class StableDiffusionInpaintPipeline(
|
||||
return timesteps, num_inference_steps - t_start
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
+5
-2
@@ -716,12 +716,15 @@ class StableDiffusionDiffEditPipeline(
|
||||
f" `source_negative_prompt_embeds` {source_negative_prompt_embeds.shape}."
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
||||
def get_timesteps(self, num_inference_steps, strength, device):
|
||||
# get the original timestep using init_timestep
|
||||
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
||||
|
||||
t_start = max(num_inference_steps - init_timestep, 0)
|
||||
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
||||
if hasattr(self.scheduler, "set_begin_index"):
|
||||
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
||||
|
||||
return timesteps, num_inference_steps - t_start
|
||||
|
||||
@@ -1301,7 +1304,7 @@ class StableDiffusionDiffEditPipeline(
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
||||
clip_ckip: int = None,
|
||||
clip_skip: int = None,
|
||||
):
|
||||
r"""
|
||||
The call function to the pipeline for generation.
|
||||
@@ -1423,7 +1426,7 @@ class StableDiffusionDiffEditPipeline(
|
||||
prompt_embeds=prompt_embeds,
|
||||
negative_prompt_embeds=negative_prompt_embeds,
|
||||
lora_scale=text_encoder_lora_scale,
|
||||
clip_skip=clip_ckip,
|
||||
clip_skip=clip_skip,
|
||||
)
|
||||
# For classifier free guidance, we need to do two forward passes.
|
||||
# Here we concatenate the unconditional and text embeddings into a single batch
|
||||
|
||||
@@ -644,20 +644,22 @@ class StableDiffusionLDM3DPipeline(
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -632,7 +632,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
||||
# corresponds to doing no classifier free guidance.
|
||||
do_classifier_free_guidance = guidance_scale > 1.0
|
||||
# and `sag_scale` is` `s` of equation (16)
|
||||
# of the self-attentnion guidance paper: https://arxiv.org/pdf/2210.00939.pdf
|
||||
# of the self-attention guidance paper: https://arxiv.org/pdf/2210.00939.pdf
|
||||
# `sag_scale = 0` means no self-attention guidance
|
||||
do_self_attention_guidance = sag_scale > 0.0
|
||||
|
||||
@@ -667,7 +667,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
||||
|
||||
if timesteps.dtype not in [torch.int16, torch.int32, torch.int64]:
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} does not support using a scheduler of type {self.scheduler.__class__.__name__}. Please make sure to use one of 'DDIMScheduler, PNDMScheduler, DDPMScheduler, DEISMultistepScheduler, UniPCMultistepScheduler, DPMSolverMultistepScheduler, DPMSolverSinlgestepScheduler'."
|
||||
f"{self.__class__.__name__} does not support using a scheduler of type {self.scheduler.__class__.__name__}. Please make sure to use one of 'DDIMScheduler, PNDMScheduler, DDPMScheduler, DEISMultistepScheduler, UniPCMultistepScheduler, DPMSolverMultistepScheduler, DPMSolverSinglestepScheduler'."
|
||||
)
|
||||
|
||||
# 5. Prepare latent variables
|
||||
@@ -723,7 +723,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
||||
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
||||
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
||||
|
||||
# perform self-attention guidance with the stored self-attentnion map
|
||||
# perform self-attention guidance with the stored self-attention map
|
||||
if do_self_attention_guidance:
|
||||
# classifier-free guidance produces two chunks of attention map
|
||||
# and we only use unconditional one according to equation (25)
|
||||
|
||||
@@ -740,20 +740,22 @@ class StableDiffusionXLPipeline(
|
||||
self.vae.decoder.mid_block.to(dtype)
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -874,20 +874,22 @@ class StableDiffusionXLImg2ImgPipeline(
|
||||
self.vae.decoder.mid_block.to(dtype)
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -1110,20 +1110,22 @@ class StableDiffusionXLInpaintPipeline(
|
||||
self.vae.decoder.mid_block.to(dtype)
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -613,20 +613,22 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
return height, width
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -784,20 +784,22 @@ class StableDiffusionXLAdapterPipeline(
|
||||
return height, width
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
def get_guidance_scale_embedding(
|
||||
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
w (`torch.Tensor`):
|
||||
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
Dimension of the embeddings to generate.
|
||||
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
||||
Data type of the generated embeddings.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
@@ -575,8 +575,8 @@ class TextToVideoZeroPipeline(DiffusionPipeline, StableDiffusionMixin, TextualIn
|
||||
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for video
|
||||
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
||||
tensor is generated by sampling using the supplied random `generator`.
|
||||
output_type (`str`, *optional*, defaults to `"numpy"`):
|
||||
The output format of the generated video. Choose between `"latent"` and `"numpy"`.
|
||||
output_type (`str`, *optional*, defaults to `"np"`):
|
||||
The output format of the generated video. Choose between `"latent"` and `"np"`.
|
||||
return_dict (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to return a
|
||||
[`~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput`] instead of
|
||||
|
||||
@@ -434,11 +434,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -768,14 +768,10 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||
# begin_index is None when the scheduler is used for training
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -1011,14 +1011,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||
# begin_index is None when the scheduler is used for training
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -543,11 +543,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -961,14 +961,10 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||
# begin_index is None when the scheduler is used for training
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -669,11 +669,7 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -367,11 +367,7 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -467,11 +467,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -562,11 +562,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -468,11 +468,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -494,11 +494,7 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -469,11 +469,7 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -461,11 +461,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -862,14 +862,10 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||
# begin_index is None when the scheduler is used for training
|
||||
if self.begin_index is None:
|
||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||
elif self.step_index is not None:
|
||||
# add_noise is called after first denoising step (for inpainting)
|
||||
step_indices = [self.step_index] * timesteps.shape[0]
|
||||
else:
|
||||
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
|
||||
@@ -158,7 +158,7 @@ class PeftLoraLoaderMixinTests:
|
||||
|
||||
pipeline_inputs = {
|
||||
"prompt": "A painting of a squirrel eating a burger",
|
||||
"num_inference_steps": 5,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "np",
|
||||
}
|
||||
@@ -589,7 +589,7 @@ class PeftLoraLoaderMixinTests:
|
||||
**inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5}
|
||||
).images
|
||||
self.assertTrue(
|
||||
not np.allclose(output_lora, output_lora_scale, atol=1e-4, rtol=1e-4),
|
||||
not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
|
||||
"Lora + scale should change the output",
|
||||
)
|
||||
|
||||
@@ -1300,11 +1300,6 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
pipe.load_lora_weights(lora_id)
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
self.assertTrue(
|
||||
self.check_if_lora_correctly_set(pipe.unet),
|
||||
"Lora not correctly set in UNet",
|
||||
)
|
||||
|
||||
self.assertTrue(
|
||||
self.check_if_lora_correctly_set(pipe.text_encoder),
|
||||
"Lora not correctly set in text encoder 2",
|
||||
|
||||
@@ -211,7 +211,7 @@ class ControlNetPipelineFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": image,
|
||||
}
|
||||
|
||||
@@ -402,7 +402,7 @@ class StableDiffusionMultiControlNetPipelineFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": images,
|
||||
}
|
||||
|
||||
@@ -602,7 +602,7 @@ class StableDiffusionMultiControlNetOneModelPipelineFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": images,
|
||||
}
|
||||
|
||||
@@ -1092,6 +1092,13 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
for param_name, param_value in single_file_pipe.controlnet.config.items():
|
||||
if param_name in PARAMS_TO_IGNORE:
|
||||
continue
|
||||
|
||||
# This parameter doesn't appear to be loaded from the config.
|
||||
# So when it is registered to config, it remains a tuple as this is the default in the class definition
|
||||
# from_pretrained, does load from config and converts to a list when registering to config
|
||||
if param_name == "conditioning_embedding_out_channels" and isinstance(param_value, tuple):
|
||||
param_value = list(param_value)
|
||||
|
||||
assert (
|
||||
pipe.controlnet.config[param_name] == param_value
|
||||
), f"{param_name} differs between single file loading and pretrained loading"
|
||||
|
||||
@@ -164,7 +164,7 @@ class ControlNetImg2ImgPipelineFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": image,
|
||||
"control_image": control_image,
|
||||
}
|
||||
@@ -313,7 +313,7 @@ class StableDiffusionMultiControlNetPipelineFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": image,
|
||||
"control_image": control_image,
|
||||
}
|
||||
|
||||
@@ -155,7 +155,7 @@ class ControlNetInpaintPipelineFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": image,
|
||||
"mask_image": mask_image,
|
||||
"control_image": control_image,
|
||||
@@ -375,7 +375,7 @@ class MultiControlNetInpaintPipelineFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": image,
|
||||
"mask_image": mask_image,
|
||||
"control_image": control_image,
|
||||
|
||||
@@ -172,7 +172,7 @@ class ControlNetPipelineSDXLFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": init_image,
|
||||
"mask_image": mask_image,
|
||||
"control_image": control_image,
|
||||
|
||||
@@ -1002,6 +1002,11 @@ class ControlNetSDXLPipelineSlowTests(unittest.TestCase):
|
||||
for param_name, param_value in single_file_pipe.unet.config.items():
|
||||
if param_name in PARAMS_TO_IGNORE:
|
||||
continue
|
||||
|
||||
# Upcast attention might be set to None in a config file, which is incorrect. It should default to False in the model
|
||||
if param_name == "upcast_attention" and pipe.unet.config[param_name] is None:
|
||||
pipe.unet.config[param_name] = False
|
||||
|
||||
assert (
|
||||
pipe.unet.config[param_name] == param_value
|
||||
), f"{param_name} differs between single file loading and pretrained loading"
|
||||
|
||||
@@ -163,7 +163,7 @@ class ControlNetPipelineSDXLImg2ImgFastTests(
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
"image": image,
|
||||
"control_image": image,
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ class DDIMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
"batch_size": 1,
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
return inputs
|
||||
|
||||
@@ -113,7 +113,7 @@ class DDIMPipelineIntegrationTests(unittest.TestCase):
|
||||
ddim.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
image = ddim(generator=generator, eta=0.0, output_type="numpy").images
|
||||
image = ddim(generator=generator, eta=0.0, output_type="np").images
|
||||
|
||||
image_slice = image[0, -3:, -3:, -1]
|
||||
|
||||
@@ -133,7 +133,7 @@ class DDIMPipelineIntegrationTests(unittest.TestCase):
|
||||
ddpm.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
image = ddpm(generator=generator, output_type="numpy").images
|
||||
image = ddpm(generator=generator, output_type="np").images
|
||||
|
||||
image_slice = image[0, -3:, -3:, -1]
|
||||
|
||||
|
||||
@@ -50,10 +50,10 @@ class DDPMPipelineFastTests(unittest.TestCase):
|
||||
ddpm.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.Generator(device=device).manual_seed(0)
|
||||
image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images
|
||||
image = ddpm(generator=generator, num_inference_steps=2, output_type="np").images
|
||||
|
||||
generator = torch.Generator(device=device).manual_seed(0)
|
||||
image_from_tuple = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", return_dict=False)[0]
|
||||
image_from_tuple = ddpm(generator=generator, num_inference_steps=2, output_type="np", return_dict=False)[0]
|
||||
|
||||
image_slice = image[0, -3:, -3:, -1]
|
||||
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
|
||||
@@ -75,10 +75,10 @@ class DDPMPipelineFastTests(unittest.TestCase):
|
||||
ddpm.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images
|
||||
image = ddpm(generator=generator, num_inference_steps=2, output_type="np").images
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
image_eps = ddpm(generator=generator, num_inference_steps=2, output_type="numpy")[0]
|
||||
image_eps = ddpm(generator=generator, num_inference_steps=2, output_type="np")[0]
|
||||
|
||||
image_slice = image[0, -3:, -3:, -1]
|
||||
image_eps_slice = image_eps[0, -3:, -3:, -1]
|
||||
@@ -102,7 +102,7 @@ class DDPMPipelineIntegrationTests(unittest.TestCase):
|
||||
ddpm.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
image = ddpm(generator=generator, output_type="numpy").images
|
||||
image = ddpm(generator=generator, output_type="np").images
|
||||
|
||||
image_slice = image[0, -3:, -3:, -1]
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ class IFPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, unittest.T
|
||||
"prompt": "A painting of a squirrel eating a burger",
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
@@ -55,7 +55,7 @@ class IFImg2ImgPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, uni
|
||||
"image": image,
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
@@ -57,7 +57,7 @@ class IFImg2ImgSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineT
|
||||
"original_image": original_image,
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
@@ -57,7 +57,7 @@ class IFInpaintingPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin,
|
||||
"mask_image": mask_image,
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
@@ -59,7 +59,7 @@ class IFInpaintingSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipeli
|
||||
"mask_image": mask_image,
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
@@ -52,7 +52,7 @@ class IFSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMi
|
||||
"image": image,
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
|
||||
return inputs
|
||||
|
||||
@@ -74,7 +74,7 @@ class DiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
"class_labels": [1],
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
return inputs
|
||||
|
||||
|
||||
@@ -113,7 +113,7 @@ class LDMTextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
"generator": generator,
|
||||
"num_inference_steps": 2,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
return inputs
|
||||
|
||||
@@ -153,7 +153,7 @@ class LDMTextToImagePipelineSlowTests(unittest.TestCase):
|
||||
"generator": generator,
|
||||
"num_inference_steps": 3,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
return inputs
|
||||
|
||||
@@ -189,7 +189,7 @@ class LDMTextToImagePipelineNightlyTests(unittest.TestCase):
|
||||
"generator": generator,
|
||||
"num_inference_steps": 50,
|
||||
"guidance_scale": 6.0,
|
||||
"output_type": "numpy",
|
||||
"output_type": "np",
|
||||
}
|
||||
return inputs
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user