Compare commits

..

1 Commits

Author SHA1 Message Date
sayakpaul 971d0cd6e6 replace with actual fix. 2024-09-23 08:18:50 +05:30
68 changed files with 283 additions and 902 deletions
-1
View File
@@ -7,7 +7,6 @@ on:
env:
DIFFUSERS_IS_CI: yes
HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
-1
View File
@@ -22,7 +22,6 @@ concurrency:
env:
DIFFUSERS_IS_CI: yes
HF_HUB_ENABLE_HF_TRANSFER: 1
OMP_NUM_THREADS: 4
MKL_NUM_THREADS: 4
PYTEST_TIMEOUT: 60
-1
View File
@@ -14,7 +14,6 @@ env:
DIFFUSERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600
PIPELINE_USAGE_CUTOFF: 50000
-1
View File
@@ -18,7 +18,6 @@ env:
HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600
RUN_SLOW: no
-1
View File
@@ -13,7 +13,6 @@ env:
HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
HF_HUB_ENABLE_HF_TRANSFER: 1
PYTEST_TIMEOUT: 600
RUN_SLOW: no
+1 -2
View File
@@ -43,7 +43,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
transformers
CMD ["/bin/bash"]
+1 -2
View File
@@ -45,7 +45,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
transformers
CMD ["/bin/bash"]
+1 -2
View File
@@ -43,7 +43,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
transformers
CMD ["/bin/bash"]
+1 -2
View File
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
transformers
CMD ["/bin/bash"]
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
numpy==1.26.4 \
scipy \
tensorboard \
transformers \
hf_transfer
transformers
CMD ["/bin/bash"]
+1 -2
View File
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
numpy==1.26.4 \
scipy \
tensorboard \
transformers matplotlib \
hf_transfer
transformers matplotlib
CMD ["/bin/bash"]
+1 -2
View File
@@ -45,7 +45,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
scipy \
tensorboard \
transformers \
pytorch-lightning \
hf_transfer
pytorch-lightning
CMD ["/bin/bash"]
@@ -45,7 +45,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
scipy \
tensorboard \
transformers \
xformers \
hf_transfer
xformers
CMD ["/bin/bash"]
+1 -1
View File
@@ -56,7 +56,7 @@
- local: using-diffusers/overview_techniques
title: Overview
- local: training/distributed_inference
title: Distributed inference
title: Distributed inference with multiple GPUs
- local: using-diffusers/merge_loras
title: Merge LoRAs
- local: using-diffusers/scheduler_features
@@ -40,7 +40,6 @@ To generate a video from prompt, run the following Python code:
```python
import torch
from diffusers import TextToVideoZeroPipeline
import imageio
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
@@ -45,14 +45,6 @@ Many schedulers are implemented from the [k-diffusion](https://github.com/crowso
| N/A | [`DEISMultistepScheduler`] | |
| N/A | [`UniPCMultistepScheduler`] | |
## Noise schedules and schedule types
| A1111/k-diffusion | 🤗 Diffusers |
|--------------------------|----------------------------------------------------------------------------|
| Karras | init with `use_karras_sigmas=True` |
| sgm_uniform | init with `timestep_spacing="trailing"` |
| simple | init with `timestep_spacing="trailing"` |
| exponential | init with `timestep_spacing="linspace"`, `use_exponential_sigmas=True` |
All schedulers are built from the base [`SchedulerMixin`] class which implements low level utilities shared by all schedulers.
## SchedulerMixin
-4
View File
@@ -75,8 +75,4 @@ Happy exploring, and thank you for being part of the Diffusers community!
<td><a href="https://github.com/cumulo-autumn/StreamDiffusion"> StreamDiffusion </a></td>
<td>A Pipeline-Level Solution for Real-Time Interactive Generation</td>
</tr>
<tr style="border-top: 2px solid black">
<td><a href="https://github.com/Netwrck/stable-diffusion-server"> Stable Diffusion Server </a></td>
<td>A server configured for Inpainting/Generation/img2img with one stable diffusion model</td>
</tr>
</table>
+1 -1
View File
@@ -95,7 +95,7 @@ print(f"Model downloaded at {model_path}")
Once you have downloaded a snapshot of the model, you can test it using Apple's Python script.
```shell
python -m python_coreml_stable_diffusion.pipeline --prompt "a photo of an astronaut riding a horse on mars" -i ./models/coreml-stable-diffusion-v1-4_original_packages/original/packages -o </path/to/output/image> --compute-unit CPU_AND_GPU --seed 93
python -m python_coreml_stable_diffusion.pipeline --prompt "a photo of an astronaut riding a horse on mars" -i models/coreml-stable-diffusion-v1-4_original_packages -o </path/to/output/image> --compute-unit CPU_AND_GPU --seed 93
```
Pass the path of the downloaded checkpoint with `-i` flag to the script. `--compute-unit` indicates the hardware you want to allow for inference. It must be one of the following options: `ALL`, `CPU_AND_GPU`, `CPU_ONLY`, `CPU_AND_NE`. You may also provide an optional output path, and a seed for reproducibility.
@@ -10,7 +10,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
specific language governing permissions and limitations under the License.
-->
# Distributed inference
# Distributed inference with multiple GPUs
On distributed setups, you can run inference across multiple GPUs with 🤗 [Accelerate](https://huggingface.co/docs/accelerate/index) or [PyTorch Distributed](https://pytorch.org/tutorials/beginner/dist_overview.html), which is useful for generating with multiple prompts in parallel.
@@ -109,131 +109,3 @@ torchrun run_distributed.py --nproc_per_node=2
> [!TIP]
> You can use `device_map` within a [`DiffusionPipeline`] to distribute its model-level components on multiple devices. Refer to the [Device placement](../tutorials/inference_with_big_models#device-placement) guide to learn more.
## Model sharding
Modern diffusion systems such as [Flux](../api/pipelines/flux) are very large and have multiple models. For example, [Flux.1-Dev](https://hf.co/black-forest-labs/FLUX.1-dev) is made up of two text encoders - [T5-XXL](https://hf.co/google/t5-v1_1-xxl) and [CLIP-L](https://hf.co/openai/clip-vit-large-patch14) - a [diffusion transformer](../api/models/flux_transformer), and a [VAE](../api/models/autoencoderkl). With a model this size, it can be challenging to run inference on consumer GPUs.
Model sharding is a technique that distributes models across GPUs when the models don't fit on a single GPU. The example below assumes two 16GB GPUs are available for inference.
Start by computing the text embeddings with the text encoders. Keep the text encoders on two GPUs by setting `device_map="balanced"`. The `balanced` strategy evenly distributes the model on all available GPUs. Use the `max_memory` parameter to allocate the maximum amount of memory for each text encoder on each GPU.
> [!TIP]
> **Only** load the text encoders for this step! The diffusion transformer and VAE are loaded in a later step to preserve memory.
```py
from diffusers import FluxPipeline
import torch
prompt = "a photo of a dog with cat-like look"
pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
transformer=None,
vae=None,
device_map="balanced",
max_memory={0: "16GB", 1: "16GB"},
torch_dtype=torch.bfloat16
)
with torch.no_grad():
print("Encoding prompts.")
prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
prompt=prompt, prompt_2=None, max_sequence_length=512
)
```
Once the text embeddings are computed, remove them from the GPU to make space for the diffusion transformer.
```py
import gc
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
del pipeline.text_encoder
del pipeline.text_encoder_2
del pipeline.tokenizer
del pipeline.tokenizer_2
del pipeline
flush()
```
Load the diffusion transformer next which has 12.5B parameters. This time, set `device_map="auto"` to automatically distribute the model across two 16GB GPUs. The `auto` strategy is backed by [Accelerate](https://hf.co/docs/accelerate/index) and available as a part of the [Big Model Inference](https://hf.co/docs/accelerate/concept_guides/big_model_inference) feature. It starts by distributing a model across the fastest device first (GPU) before moving to slower devices like the CPU and hard drive if needed. The trade-off of storing model parameters on slower devices is slower inference latency.
```py
from diffusers import FluxTransformer2DModel
import torch
transformer = FluxTransformer2DModel.from_pretrained(
"black-forest-labs/FLUX.1-dev",
subfolder="transformer",
device_map="auto",
torch_dtype=torch.bfloat16
)
```
> [!TIP]
> At any point, you can try `print(pipeline.hf_device_map)` to see how the various models are distributed across devices. This is useful for tracking the device placement of the models.
Add the transformer model to the pipeline for denoising, but set the other model-level components like the text encoders and VAE to `None` because you don't need them yet.
```py
pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev", ,
text_encoder=None,
text_encoder_2=None,
tokenizer=None,
tokenizer_2=None,
vae=None,
transformer=transformer,
torch_dtype=torch.bfloat16
)
print("Running denoising.")
height, width = 768, 1360
latents = pipeline(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
num_inference_steps=50,
guidance_scale=3.5,
height=height,
width=width,
output_type="latent",
).images
```
Remove the pipeline and transformer from memory as they're no longer needed.
```py
del pipeline.transformer
del pipeline
flush()
```
Finally, decode the latents with the VAE into an image. The VAE is typically small enough to be loaded on a single GPU.
```py
from diffusers import AutoencoderKL
from diffusers.image_processor import VaeImageProcessor
import torch
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=torch.bfloat16).to("cuda")
vae_scale_factor = 2 ** (len(vae.config.block_out_channels))
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
with torch.no_grad():
print("Running decoding.")
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
image = vae.decode(latents, return_dict=False)[0]
image = image_processor.postprocess(image, output_type="pil")
image[0].save("split_transformer.png")
```
By selectively loading and unloading the models you need at a given stage and sharding the largest models across multiple GPUs, it is possible to run inference with large models on consumer GPUs.
+4 -3
View File
@@ -171,13 +171,14 @@ def latents_to_rgb(latents):
weights = (
(60, -60, 25, -70),
(60, -5, 15, -50),
(60, 10, -5, -35),
(60, 10, -5, -35)
)
weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device))
biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device)
rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(-1).unsqueeze(-1)
image_array = rgb_tensor.clamp(0, 255).byte().cpu().numpy().transpose(1, 2, 0)
image_array = rgb_tensor.clamp(0, 255)[0].byte().cpu().numpy()
image_array = image_array.transpose(1, 2, 0)
return Image.fromarray(image_array)
```
@@ -188,7 +189,7 @@ def latents_to_rgb(latents):
def decode_tensors(pipe, step, timestep, callback_kwargs):
latents = callback_kwargs["latents"]
image = latents_to_rgb(latents[0])
image = latents_to_rgb(latents)
image.save(f"{step}.png")
return callback_kwargs
-31
View File
@@ -10,7 +10,6 @@ Please also check out our [Community Scripts](https://github.com/huggingface/dif
| Example | Description | Code Example | Colab | Author |
|:--------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------:|
|Flux with CFG|[Flux with CFG](https://github.com/ToTheBeginning/PuLID/blob/main/docs/pulid_for_flux.md) provides an implementation of using CFG in [Flux](https://blackforestlabs.ai/announcing-black-forest-labs/).|[Flux with CFG](#flux-with-cfg)|NA|[Linoy Tsaban](https://github.com/linoytsaban), [Apolinário](https://github.com/apolinario), and [Sayak Paul](https://github.com/sayakpaul)|
|Differential Diffusion|[Differential Diffusion](https://github.com/exx8/differential-diffusion) modifies an image according to a text prompt, and according to a map that specifies the amount of change in each region.|[Differential Diffusion](#differential-diffusion)|[![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/exx8/differential-diffusion) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/exx8/differential-diffusion/blob/main/examples/SD2.ipynb)|[Eran Levin](https://github.com/exx8) and [Ohad Fried](https://www.ohadf.com/)|
| HD-Painter | [HD-Painter](https://github.com/Picsart-AI-Research/HD-Painter) enables prompt-faithfull and high resolution (up to 2k) image inpainting upon any diffusion-based image inpainting method. | [HD-Painter](#hd-painter) | [![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/PAIR/HD-Painter) | [Manukyan Hayk](https://github.com/haikmanukyan) and [Sargsyan Andranik](https://github.com/AndranikSargsyan) |
| Marigold Monocular Depth Estimation | A universal monocular depth estimator, utilizing Stable Diffusion, delivering sharp predictions in the wild. (See the [project page](https://marigoldmonodepth.github.io) and [full codebase](https://github.com/prs-eth/marigold) for more details.) | [Marigold Depth Estimation](#marigold-depth-estimation) | [![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/toshas/marigold) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/12G8reD13DdpMie5ZQlaFNo2WCGeNUH-u?usp=sharing) | [Bingxin Ke](https://github.com/markkua) and [Anton Obukhov](https://github.com/toshas) |
@@ -83,36 +82,6 @@ pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion
## Example usages
### Flux with CFG
Know more about Flux [here](https://blackforestlabs.ai/announcing-black-forest-labs/). Since Flux doesn't use CFG, this implementation provides one, inspired by the [PuLID Flux adaptation](https://github.com/ToTheBeginning/PuLID/blob/main/docs/pulid_for_flux.md).
Example usage:
```py
from diffusers import DiffusionPipeline
import torch
pipeline = DiffusionPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
torch_dtype=torch.bfloat16,
custom_pipeline="pipeline_flux_with_cfg"
)
pipeline.enable_model_cpu_offload()
prompt = "a watercolor painting of a unicorn"
negative_prompt = "pink"
img = pipeline(
prompt=prompt,
negative_prompt=negative_prompt,
true_cfg=1.5,
guidance_scale=3.5,
num_images_per_prompt=1,
generator=torch.manual_seed(0)
).images[0]
img.save("cfg_flux.png")
```
### Differential Diffusion
**Eran Levin, Ohad Fried**
+69 -79
View File
@@ -289,104 +289,80 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
self,
prompt: Union[str, List[str]],
prompt_2: Union[str, List[str]],
negative_prompt: Union[str, List[str]] = None,
negative_prompt_2: Union[str, List[str]] = None,
device: Optional[torch.device] = None,
num_images_per_prompt: int = 1,
prompt_embeds: Optional[torch.FloatTensor] = None,
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
max_sequence_length: int = 512,
lora_scale: Optional[float] = None,
do_true_cfg: bool = False,
):
r"""
Args:
prompt (`str` or `List[str]`, *optional*):
prompt to be encoded
prompt_2 (`str` or `List[str]`, *optional*):
The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
used in all text-encoders
device: (`torch.device`):
torch device
num_images_per_prompt (`int`):
number of images that should be generated per prompt
prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument.
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
If not provided, pooled text embeddings will be generated from `prompt` input argument.
lora_scale (`float`, *optional*):
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
"""
device = device or self._execution_device
# Set LoRA scale if applicable
# set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, FluxLoraLoaderMixin):
self._lora_scale = lora_scale
# dynamically adjust the LoRA scale
if self.text_encoder is not None and USE_PEFT_BACKEND:
scale_lora_layers(self.text_encoder, lora_scale)
if self.text_encoder_2 is not None and USE_PEFT_BACKEND:
scale_lora_layers(self.text_encoder_2, lora_scale)
prompt = [prompt] if isinstance(prompt, str) else prompt
batch_size = len(prompt)
if do_true_cfg and negative_prompt is not None:
negative_prompt = [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt
negative_batch_size = len(negative_prompt)
if negative_batch_size != batch_size:
raise ValueError(
f"Negative prompt batch size ({negative_batch_size}) does not match prompt batch size ({batch_size})"
)
# Concatenate prompts
prompts = prompt + negative_prompt
prompts_2 = (
prompt_2 + negative_prompt_2 if prompt_2 is not None and negative_prompt_2 is not None else None
)
else:
prompts = prompt
prompts_2 = prompt_2
if prompt_embeds is None:
if prompts_2 is None:
prompts_2 = prompts
prompt_2 = prompt_2 or prompt
prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
# Get pooled prompt embeddings from CLIPTextModel
# We only use the pooled prompt output from the CLIPTextModel
pooled_prompt_embeds = self._get_clip_prompt_embeds(
prompt=prompts,
prompt=prompt,
device=device,
num_images_per_prompt=num_images_per_prompt,
)
prompt_embeds = self._get_t5_prompt_embeds(
prompt=prompts_2,
prompt=prompt_2,
num_images_per_prompt=num_images_per_prompt,
max_sequence_length=max_sequence_length,
device=device,
)
if do_true_cfg and negative_prompt is not None:
# Split embeddings back into positive and negative parts
total_batch_size = batch_size * num_images_per_prompt
positive_indices = slice(0, total_batch_size)
negative_indices = slice(total_batch_size, 2 * total_batch_size)
positive_pooled_prompt_embeds = pooled_prompt_embeds[positive_indices]
negative_pooled_prompt_embeds = pooled_prompt_embeds[negative_indices]
positive_prompt_embeds = prompt_embeds[positive_indices]
negative_prompt_embeds = prompt_embeds[negative_indices]
pooled_prompt_embeds = positive_pooled_prompt_embeds
prompt_embeds = positive_prompt_embeds
# Unscale LoRA layers
if self.text_encoder is not None:
if isinstance(self, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale)
if self.text_encoder_2 is not None:
if isinstance(self, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder_2, lora_scale)
dtype = self.text_encoder.dtype if self.text_encoder is not None else self.transformer.dtype
text_ids = torch.zeros(prompt_embeds.shape[1], 3).to(device=device, dtype=dtype)
if do_true_cfg and negative_prompt is not None:
return (
prompt_embeds,
pooled_prompt_embeds,
text_ids,
negative_prompt_embeds,
negative_pooled_prompt_embeds,
)
else:
return prompt_embeds, pooled_prompt_embeds, text_ids, None, None
return prompt_embeds, pooled_prompt_embeds, text_ids
def check_inputs(
self,
@@ -711,33 +687,38 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
lora_scale = (
self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
)
do_true_cfg = true_cfg > 1 and negative_prompt is not None
(
prompt_embeds,
pooled_prompt_embeds,
text_ids,
negative_prompt_embeds,
negative_pooled_prompt_embeds,
) = self.encode_prompt(
prompt=prompt,
prompt_2=prompt_2,
negative_prompt=negative_prompt,
negative_prompt_2=negative_prompt_2,
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
device=device,
num_images_per_prompt=num_images_per_prompt,
max_sequence_length=max_sequence_length,
lora_scale=lora_scale,
do_true_cfg=do_true_cfg,
)
# perform "real" CFG as suggested for distilled Flux models in https://github.com/ToTheBeginning/PuLID/blob/main/docs/pulid_for_flux.md
do_true_cfg = true_cfg > 1 and negative_prompt is not None
if do_true_cfg:
# Concatenate embeddings
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
pooled_prompt_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0)
(
negative_prompt_embeds,
negative_pooled_prompt_embeds,
negative_text_ids,
) = self.encode_prompt(
prompt=negative_prompt,
prompt_2=negative_prompt_2,
prompt_embeds=negative_prompt_embeds,
pooled_prompt_embeds=negative_pooled_prompt_embeds,
device=device,
num_images_per_prompt=num_images_per_prompt,
max_sequence_length=max_sequence_length,
lora_scale=lora_scale,
)
# 4. Prepare latent variables
num_channels_latents = self.transformer.config.in_channels // 4
@@ -773,26 +754,24 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
self._num_timesteps = len(timesteps)
# handle guidance
if self.transformer.config.guidance_embeds:
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
guidance = guidance.expand(latents.shape[0])
else:
guidance = None
# 6. Denoising loop
with self.progress_bar(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
if self.interrupt:
continue
latent_model_input = torch.cat([latents] * 2) if do_true_cfg else latents
# handle guidance
if self.transformer.config.guidance_embeds:
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
guidance = guidance.expand(latent_model_input.shape[0])
else:
guidance = None
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
timestep = t.expand(latent_model_input.shape[0]).to(latent_model_input.dtype)
timestep = t.expand(latents.shape[0]).to(latents.dtype)
noise_pred = self.transformer(
hidden_states=latent_model_input,
hidden_states=latents,
timestep=timestep / 1000,
guidance=guidance,
pooled_projections=pooled_prompt_embeds,
@@ -804,7 +783,18 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
)[0]
if do_true_cfg:
neg_noise_pred, noise_pred = noise_pred.chunk(2)
neg_noise_pred = self.transformer(
hidden_states=latents,
timestep=timestep / 1000,
guidance=guidance,
pooled_projections=negative_pooled_prompt_embeds,
encoder_hidden_states=negative_prompt_embeds,
txt_ids=negative_text_ids,
img_ids=latent_image_ids,
joint_attention_kwargs=self.joint_attention_kwargs,
return_dict=False,
)[0]
noise_pred = neg_noise_pred + true_cfg * (noise_pred - neg_noise_pred)
# compute the previous noisy sample x_t -> x_t-1
+2 -3
View File
@@ -31,7 +31,7 @@ import torch.utils.checkpoint
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed
from accelerate.utils import ProjectConfiguration, set_seed
from datasets import load_dataset
from huggingface_hub import create_repo, upload_folder
from packaging import version
@@ -899,13 +899,12 @@ def main(args):
logging_dir = Path(args.output_dir, args.logging_dir)
accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
accelerator = Accelerator(
gradient_accumulation_steps=args.gradient_accumulation_steps,
mixed_precision=args.mixed_precision,
log_with=args.report_to,
project_config=accelerator_project_config,
kwargs_handlers=[kwargs],
)
# Disable AMP for MPS.
+3 -5
View File
@@ -14,7 +14,6 @@
# limitations under the License.
"""Conversion script for the Stable Diffusion checkpoints."""
import copy
import os
import re
from contextlib import nullcontext
@@ -92,11 +91,11 @@ DIFFUSERS_DEFAULT_PIPELINE_PATHS = {
"xl_inpaint": {"pretrained_model_name_or_path": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1"},
"playground-v2-5": {"pretrained_model_name_or_path": "playgroundai/playground-v2.5-1024px-aesthetic"},
"upscale": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-x4-upscaler"},
"inpainting": {"pretrained_model_name_or_path": "stable-diffusion-v1-5/stable-diffusion-inpainting"},
"inpainting": {"pretrained_model_name_or_path": "Lykon/dreamshaper-8-inpainting"},
"inpainting_v2": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-inpainting"},
"controlnet": {"pretrained_model_name_or_path": "lllyasviel/control_v11p_sd15_canny"},
"v2": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-1"},
"v1": {"pretrained_model_name_or_path": "stable-diffusion-v1-5/stable-diffusion-v1-5"},
"v1": {"pretrained_model_name_or_path": "Lykon/dreamshaper-8"},
"stable_cascade_stage_b": {"pretrained_model_name_or_path": "stabilityai/stable-cascade", "subfolder": "decoder"},
"stable_cascade_stage_b_lite": {
"pretrained_model_name_or_path": "stabilityai/stable-cascade",
@@ -542,7 +541,6 @@ def infer_diffusers_model_type(checkpoint):
def fetch_diffusers_config(checkpoint):
model_type = infer_diffusers_model_type(checkpoint)
model_path = DIFFUSERS_DEFAULT_PIPELINE_PATHS[model_type]
model_path = copy.deepcopy(model_path)
return model_path
@@ -1513,7 +1511,7 @@ def _legacy_load_scheduler(
)
deprecate("prediction_type", "1.0.0", deprecation_message)
scheduler_config = copy.deepcopy(SCHEDULER_DEFAULT_CONFIG)
scheduler_config = SCHEDULER_DEFAULT_CONFIG
model_type = infer_diffusers_model_type(checkpoint=checkpoint)
global_step = checkpoint["global_step"] if "global_step" in checkpoint else None
+1 -1
View File
@@ -336,7 +336,7 @@ class SD3ControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
return custom_forward
ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
hidden_states = torch.utils.checkpoint.checkpoint(
create_custom_forward(block),
hidden_states,
encoder_hidden_states,
+15 -11
View File
@@ -93,20 +93,24 @@ def get_parameter_device(parameter: torch.nn.Module) -> torch.device:
def get_parameter_dtype(parameter: torch.nn.Module) -> torch.dtype:
try:
return next(parameter.parameters()).dtype
params = tuple(parameter.parameters())
if len(params) > 0:
return params[0].dtype
buffers = tuple(parameter.buffers())
if len(buffers) > 0:
return buffers[0].dtype
except StopIteration:
try:
return next(parameter.buffers()).dtype
except StopIteration:
# For torch.nn.DataParallel compatibility in PyTorch 1.5
# For torch.nn.DataParallel compatibility in PyTorch 1.5
def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
return tuples
def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
return tuples
gen = parameter._named_members(get_members_fn=find_tensor_attributes)
first_tuple = next(gen)
return first_tuple[1].dtype
gen = parameter._named_members(get_members_fn=find_tensor_attributes)
first_tuple = next(gen)
return first_tuple[1].dtype
class ModelMixin(torch.nn.Module, PushToHubMixin):
@@ -188,9 +188,6 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
self.vae_scale_factor_temporal = (
self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
)
self.vae_scaling_factor_image = (
self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
)
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
@@ -320,12 +317,6 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
def prepare_latents(
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
):
if isinstance(generator, list) and len(generator) != batch_size:
raise ValueError(
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
)
shape = (
batch_size,
(num_frames - 1) // self.vae_scale_factor_temporal + 1,
@@ -333,6 +324,11 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
height // self.vae_scale_factor_spatial,
width // self.vae_scale_factor_spatial,
)
if isinstance(generator, list) and len(generator) != batch_size:
raise ValueError(
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
)
if latents is None:
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
@@ -345,7 +341,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
def decode_latents(self, latents: torch.Tensor) -> torch.Tensor:
latents = latents.permute(0, 2, 1, 3, 4) # [batch_size, num_channels, num_frames, height, width]
latents = 1 / self.vae_scaling_factor_image * latents
latents = 1 / self.vae.config.scaling_factor * latents
frames = self.vae.decode(latents).sample
return frames
@@ -514,10 +510,10 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`).
height (`int`, *optional*, defaults to self.transformer.config.sample_height * self.vae_scale_factor_spatial):
The height in pixels of the generated image. This is set to 480 by default for the best results.
width (`int`, *optional*, defaults to self.transformer.config.sample_height * self.vae_scale_factor_spatial):
The width in pixels of the generated image. This is set to 720 by default for the best results.
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The height in pixels of the generated image. This is set to 1024 by default for the best results.
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The width in pixels of the generated image. This is set to 1024 by default for the best results.
num_frames (`int`, defaults to `48`):
Number of frames to generate. Must be divisible by self.vae_scale_factor_temporal. Generated video will
contain 1 extra frame because CogVideoX is conditioned with (num_seconds * fps + 1) frames where
@@ -591,6 +587,8 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
height = height or self.transformer.config.sample_size * self.vae_scale_factor_spatial
width = width or self.transformer.config.sample_size * self.vae_scale_factor_spatial
num_videos_per_prompt = 1
# 1. Check inputs. Raise error if not correct
@@ -207,9 +207,6 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
self.vae_scale_factor_temporal = (
self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
)
self.vae_scaling_factor_image = (
self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
)
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
@@ -351,12 +348,6 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
generator: Optional[torch.Generator] = None,
latents: Optional[torch.Tensor] = None,
):
if isinstance(generator, list) and len(generator) != batch_size:
raise ValueError(
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
)
num_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
shape = (
batch_size,
@@ -366,6 +357,12 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
width // self.vae_scale_factor_spatial,
)
if isinstance(generator, list) and len(generator) != batch_size:
raise ValueError(
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
)
image = image.unsqueeze(2) # [B, C, F, H, W]
if isinstance(generator, list):
@@ -376,7 +373,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
image_latents = [retrieve_latents(self.vae.encode(img.unsqueeze(0)), generator) for img in image]
image_latents = torch.cat(image_latents, dim=0).to(dtype).permute(0, 2, 1, 3, 4) # [B, F, C, H, W]
image_latents = self.vae_scaling_factor_image * image_latents
image_latents = self.vae.config.scaling_factor * image_latents
padding_shape = (
batch_size,
@@ -400,7 +397,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.cogvideo.pipeline_cogvideox.CogVideoXPipeline.decode_latents
def decode_latents(self, latents: torch.Tensor) -> torch.Tensor:
latents = latents.permute(0, 2, 1, 3, 4) # [batch_size, num_channels, num_frames, height, width]
latents = 1 / self.vae_scaling_factor_image * latents
latents = 1 / self.vae.config.scaling_factor * latents
frames = self.vae.decode(latents).sample
return frames
@@ -441,6 +438,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
width,
negative_prompt,
callback_on_step_end_tensor_inputs,
video=None,
latents=None,
prompt_embeds=None,
negative_prompt_embeds=None,
@@ -496,6 +494,9 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
f" {negative_prompt_embeds.shape}."
)
if video is not None and latents is not None:
raise ValueError("Only one of `video` or `latents` should be provided")
# Copied from diffusers.pipelines.cogvideo.pipeline_cogvideox.CogVideoXPipeline.fuse_qkv_projections
def fuse_qkv_projections(self) -> None:
r"""Enables fused QKV projections."""
@@ -583,7 +584,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
Args:
image (`PipelineImageInput`):
The input image to condition the generation on. Must be an image, a list of images or a `torch.Tensor`.
The input video to condition the generation on. Must be an image, a list of images or a `torch.Tensor`.
prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead.
@@ -591,10 +592,10 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`).
height (`int`, *optional*, defaults to self.transformer.config.sample_height * self.vae_scale_factor_spatial):
The height in pixels of the generated image. This is set to 480 by default for the best results.
width (`int`, *optional*, defaults to self.transformer.config.sample_height * self.vae_scale_factor_spatial):
The width in pixels of the generated image. This is set to 720 by default for the best results.
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The height in pixels of the generated image. This is set to 1024 by default for the best results.
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The width in pixels of the generated image. This is set to 1024 by default for the best results.
num_frames (`int`, defaults to `48`):
Number of frames to generate. Must be divisible by self.vae_scale_factor_temporal. Generated video will
contain 1 extra frame because CogVideoX is conditioned with (num_seconds * fps + 1) frames where
@@ -664,19 +665,20 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline):
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
height = height or self.transformer.config.sample_size * self.vae_scale_factor_spatial
width = width or self.transformer.config.sample_size * self.vae_scale_factor_spatial
num_videos_per_prompt = 1
# 1. Check inputs. Raise error if not correct
self.check_inputs(
image=image,
prompt=prompt,
height=height,
width=width,
negative_prompt=negative_prompt,
callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
latents=latents,
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
image,
prompt,
height,
width,
negative_prompt,
callback_on_step_end_tensor_inputs,
prompt_embeds,
negative_prompt_embeds,
)
self._guidance_scale = guidance_scale
self._interrupt = False
@@ -204,16 +204,12 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
self.register_modules(
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
)
self.vae_scale_factor_spatial = (
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
)
self.vae_scale_factor_temporal = (
self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
)
self.vae_scaling_factor_image = (
self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
)
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
@@ -355,12 +351,6 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
latents: Optional[torch.Tensor] = None,
timestep: Optional[torch.Tensor] = None,
):
if isinstance(generator, list) and len(generator) != batch_size:
raise ValueError(
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
)
num_frames = (video.size(2) - 1) // self.vae_scale_factor_temporal + 1 if latents is None else latents.size(1)
shape = (
@@ -371,6 +361,12 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
width // self.vae_scale_factor_spatial,
)
if isinstance(generator, list) and len(generator) != batch_size:
raise ValueError(
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
)
if latents is None:
if isinstance(generator, list):
if len(generator) != batch_size:
@@ -386,7 +382,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
init_latents = [retrieve_latents(self.vae.encode(vid.unsqueeze(0)), generator) for vid in video]
init_latents = torch.cat(init_latents, dim=0).to(dtype).permute(0, 2, 1, 3, 4) # [B, F, C, H, W]
init_latents = self.vae_scaling_factor_image * init_latents
init_latents = self.vae.config.scaling_factor * init_latents
noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
latents = self.scheduler.add_noise(init_latents, noise, timestep)
@@ -400,7 +396,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
# Copied from diffusers.pipelines.cogvideo.pipeline_cogvideox.CogVideoXPipeline.decode_latents
def decode_latents(self, latents: torch.Tensor) -> torch.Tensor:
latents = latents.permute(0, 2, 1, 3, 4) # [batch_size, num_channels, num_frames, height, width]
latents = 1 / self.vae_scaling_factor_image * latents
latents = 1 / self.vae.config.scaling_factor * latents
frames = self.vae.decode(latents).sample
return frames
@@ -593,10 +589,10 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`).
height (`int`, *optional*, defaults to self.transformer.config.sample_height * self.vae_scale_factor_spatial):
The height in pixels of the generated image. This is set to 480 by default for the best results.
width (`int`, *optional*, defaults to self.transformer.config.sample_height * self.vae_scale_factor_spatial):
The width in pixels of the generated image. This is set to 720 by default for the best results.
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The height in pixels of the generated image. This is set to 1024 by default for the best results.
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The width in pixels of the generated image. This is set to 1024 by default for the best results.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.
@@ -662,20 +658,20 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
height = height or self.transformer.config.sample_size * self.vae_scale_factor_spatial
width = width or self.transformer.config.sample_size * self.vae_scale_factor_spatial
num_videos_per_prompt = 1
# 1. Check inputs. Raise error if not correct
self.check_inputs(
prompt=prompt,
height=height,
width=width,
strength=strength,
negative_prompt=negative_prompt,
callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
video=video,
latents=latents,
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
prompt,
height,
width,
strength,
negative_prompt,
callback_on_step_end_tensor_inputs,
prompt_embeds,
negative_prompt_embeds,
)
self._guidance_scale = guidance_scale
self._attention_kwargs = attention_kwargs
@@ -101,10 +101,10 @@ class DDPMPipeline(DiffusionPipeline):
if self.device.type == "mps":
# randn does not work reproducibly on mps
image = randn_tensor(image_shape, generator=generator, dtype=self.unet.dtype)
image = randn_tensor(image_shape, generator=generator)
image = image.to(self.device)
else:
image = randn_tensor(image_shape, generator=generator, device=self.device, dtype=self.unet.dtype)
image = randn_tensor(image_shape, generator=generator, device=self.device)
# set step values
self.scheduler.set_timesteps(num_inference_steps)
@@ -111,8 +111,6 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
timestep_spacing (`str`, defaults to `"linspace"`):
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
@@ -140,12 +138,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
solver_type: str = "logrho",
lower_order_final: bool = True,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -260,9 +255,6 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
else:
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
@@ -374,28 +366,6 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def convert_model_output(
self,
model_output: torch.Tensor,
@@ -161,8 +161,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
use_lu_lambdas (`bool`, *optional*, defaults to `False`):
Whether to use the uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the noise schedule during
the sampling process. If `True`, the sigmas and time steps are determined according to a sequence of
@@ -208,7 +206,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
lower_order_final: bool = True,
euler_at_final: bool = False,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
use_lu_lambdas: Optional[bool] = False,
final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min"
lambda_min_clipped: float = -float("inf"),
@@ -217,8 +214,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
steps_offset: int = 0,
rescale_betas_zero_snr: bool = False,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)
@@ -335,8 +330,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
raise ValueError("Cannot use `timesteps` with `config.use_karras_sigmas = True`")
if timesteps is not None and self.config.use_lu_lambdas:
raise ValueError("Cannot use `timesteps` with `config.use_lu_lambdas = True`")
if timesteps is not None and self.config.use_exponential_sigmas:
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
if timesteps is not None:
timesteps = np.array(timesteps).astype(np.int64)
@@ -385,9 +378,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
lambdas = self._convert_to_lu(in_lambdas=lambdas, num_inference_steps=num_inference_steps)
sigmas = np.exp(lambdas)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
else:
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
@@ -520,28 +510,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
lambdas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return lambdas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def convert_model_output(
self,
model_output: torch.Tensor,
@@ -124,8 +124,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
lambda_min_clipped (`float`, defaults to `-inf`):
Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
cosine (`squaredcos_cap_v2`) noise schedule.
@@ -160,14 +158,11 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
lower_order_final: bool = True,
euler_at_final: bool = False,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
lambda_min_clipped: float = -float("inf"),
variance_type: Optional[str] = None,
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)
@@ -218,7 +213,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
self._step_index = None
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
self.use_karras_sigmas = use_karras_sigmas
self.use_exponential_sigmas = use_exponential_sigmas
@property
def step_index(self):
@@ -273,9 +267,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
timesteps = timesteps.copy().astype(np.int64)
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
else:
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
sigma_max = (
@@ -394,28 +385,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.convert_model_output
def convert_model_output(
self,
@@ -160,8 +160,6 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
noise_sampler_seed (`int`, *optional*, defaults to `None`):
The random seed to use for the noise sampler. If `None`, a random seed is generated.
timestep_spacing (`str`, defaults to `"linspace"`):
@@ -184,13 +182,10 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
prediction_type: str = "epsilon",
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
noise_sampler_seed: Optional[int] = None,
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -346,9 +341,6 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
second_order_timesteps = self._second_order_timesteps(sigmas, log_sigmas)
@@ -429,28 +421,6 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
@property
def state_in_first_order(self):
return self.sample is None
@@ -123,8 +123,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
final_sigmas_type (`str`, *optional*, defaults to `"zero"`):
The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final
sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0.
@@ -156,13 +154,10 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
solver_type: str = "midpoint",
lower_order_final: bool = False,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min"
lambda_min_clipped: float = -float("inf"),
variance_type: Optional[str] = None,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if algorithm_type == "dpmsolver":
deprecation_message = "algorithm_type `dpmsolver` is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
deprecate("algorithm_types=dpmsolver", "1.0.0", deprecation_message)
@@ -305,8 +300,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
raise ValueError("Must pass exactly one of `num_inference_steps` or `timesteps`.")
if timesteps is not None and self.config.use_karras_sigmas:
raise ValueError("Cannot use `timesteps` when `config.use_karras_sigmas=True`.")
if timesteps is not None and self.config.use_exponential_sigmas:
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
num_inference_steps = num_inference_steps or len(timesteps)
self.num_inference_steps = num_inference_steps
@@ -330,9 +323,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
sigmas = np.flip(sigmas).copy()
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
else:
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
@@ -462,28 +452,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def convert_model_output(
self,
model_output: torch.Tensor,
@@ -158,8 +158,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
timestep_spacing (`str`, defaults to `"linspace"`):
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
@@ -188,7 +186,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
prediction_type: str = "epsilon",
interpolation_type: str = "linear",
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
sigma_min: Optional[float] = None,
sigma_max: Optional[float] = None,
timestep_spacing: str = "linspace",
@@ -197,8 +194,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
rescale_betas_zero_snr: bool = False,
final_sigmas_type: str = "zero", # can be "zero" or "sigma_min"
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -240,7 +235,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
self.is_scale_input_called = False
self.use_karras_sigmas = use_karras_sigmas
self.use_exponential_sigmas = use_exponential_sigmas
self._step_index = None
self._begin_index = None
@@ -338,8 +332,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
raise ValueError("Can only pass one of `num_inference_steps` or `timesteps` or `sigmas`.")
if timesteps is not None and self.config.use_karras_sigmas:
raise ValueError("Cannot set `timesteps` with `config.use_karras_sigmas = True`.")
if timesteps is not None and self.config.use_exponential_sigmas:
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
if (
timesteps is not None
and self.config.timestep_type == "continuous"
@@ -404,10 +396,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
if self.config.final_sigmas_type == "sigma_min":
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
elif self.config.final_sigmas_type == "zero":
@@ -480,28 +468,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from https://github.com/crowsonkb/k-diffusion/blob/686dbad0f39640ea25c8a8c6a6e56bb40eacefa2/k_diffusion/sampling.py#L26
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def index_for_timestep(self, timestep, schedule_timesteps=None):
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
@@ -97,8 +97,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
timestep_spacing (`str`, defaults to `"linspace"`):
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
@@ -119,14 +117,11 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
prediction_type: str = "epsilon",
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
clip_sample: Optional[bool] = False,
clip_sample_range: float = 1.0,
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -256,8 +251,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
if timesteps is not None and self.config.use_karras_sigmas:
raise ValueError("Cannot use `timesteps` with `config.use_karras_sigmas = True`")
if timesteps is not None and self.config.use_exponential_sigmas:
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
num_inference_steps = num_inference_steps or len(timesteps)
self.num_inference_steps = num_inference_steps
@@ -293,9 +286,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
sigmas = torch.from_numpy(sigmas).to(device=device)
@@ -364,28 +354,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
@property
def state_in_first_order(self):
return self.dt is None
@@ -91,8 +91,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
prediction_type (`str`, defaults to `epsilon`, *optional*):
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
@@ -116,13 +114,10 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
beta_schedule: str = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
prediction_type: str = "epsilon",
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -255,9 +250,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
self.log_sigmas = torch.from_numpy(log_sigmas).to(device)
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
@@ -354,28 +346,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
@property
def state_in_first_order(self):
return self.sample is None
@@ -90,8 +90,6 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
prediction_type (`str`, defaults to `epsilon`, *optional*):
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
@@ -115,13 +113,10 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
beta_schedule: str = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
prediction_type: str = "epsilon",
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -254,9 +249,6 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
self.log_sigmas = torch.from_numpy(log_sigmas).to(device=device)
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
@@ -367,28 +359,6 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def step(
self,
model_output: Union[torch.Tensor, np.ndarray],
@@ -111,8 +111,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
prediction_type (`str`, defaults to `epsilon`, *optional*):
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
@@ -136,13 +134,10 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
beta_schedule: str = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
prediction_type: str = "epsilon",
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -294,9 +289,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
if self.config.use_karras_sigmas:
sigmas = self._convert_to_karras(in_sigmas=sigmas)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
@@ -370,28 +362,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def step(
self,
model_output: torch.Tensor,
@@ -122,8 +122,6 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
lambda_min_clipped (`float`, defaults to `-inf`):
Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
cosine (`squaredcos_cap_v2`) noise schedule.
@@ -158,14 +156,11 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
algorithm_type: str = "data_prediction",
lower_order_final: bool = True,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
lambda_min_clipped: float = -float("inf"),
variance_type: Optional[str] = None,
timestep_spacing: str = "linspace",
steps_offset: int = 0,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -289,9 +284,6 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
else:
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
@@ -403,28 +395,6 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def convert_model_output(
self,
model_output: torch.Tensor,
@@ -159,8 +159,6 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
the sigmas are determined according to a sequence of noise levels {σi}.
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
timestep_spacing (`str`, defaults to `"linspace"`):
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
@@ -197,14 +195,11 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
disable_corrector: List[int] = [],
solver_p: SchedulerMixin = None,
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
timestep_spacing: str = "linspace",
steps_offset: int = 0,
final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min"
rescale_betas_zero_snr: bool = False,
):
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -334,9 +329,6 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}"
)
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
elif self.config.use_exponential_sigmas:
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
else:
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
if self.config.final_sigmas_type == "sigma_min":
@@ -458,28 +450,6 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
if hasattr(self.config, "sigma_min"):
sigma_min = self.config.sigma_min
else:
sigma_min = None
if hasattr(self.config, "sigma_max"):
sigma_max = self.config.sigma_max
else:
sigma_max = None
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
return sigmas
def convert_model_output(
self,
model_output: torch.Tensor,
+25 -29
View File
@@ -102,7 +102,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
@slow
@require_torch_gpu
def test_integration_move_lora_cpu(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
path = "Jiali/stable-diffusion-1.5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
@@ -161,7 +161,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
def test_integration_move_lora_dora_cpu(self):
from peft import LoraConfig
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
path = "Jiali/stable-diffusion-1.5"
unet_lora_config = LoraConfig(
init_lora_weights="gaussian",
target_modules=["to_k", "to_q", "to_v", "to_out.0"],
@@ -221,7 +221,7 @@ class LoraIntegrationTests(unittest.TestCase):
torch.cuda.empty_cache()
def test_integration_logits_with_scale(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
path = "Jiali/stable-diffusion-1.5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
@@ -253,7 +253,7 @@ class LoraIntegrationTests(unittest.TestCase):
release_memory(pipe)
def test_integration_logits_no_scale(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
path = "Jiali/stable-diffusion-1.5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
@@ -284,7 +284,7 @@ class LoraIntegrationTests(unittest.TestCase):
lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
base_model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
base_model_id = "Jiali/stable-diffusion-1.5"
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
@@ -308,7 +308,7 @@ class LoraIntegrationTests(unittest.TestCase):
lora_model_id = "hf-internal-testing/lora-trained"
base_model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
base_model_id = "Jiali/stable-diffusion-1.5"
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
@@ -419,9 +419,9 @@ class LoraIntegrationTests(unittest.TestCase):
def test_kohya_sd_v15_with_higher_dimensions(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
).to(torch_device)
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
torch_device
)
lora_model_id = "hf-internal-testing/urushisato-lora"
lora_filename = "urushisato_v15.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -444,7 +444,7 @@ class LoraIntegrationTests(unittest.TestCase):
lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
base_model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
base_model_id = "Jiali/stable-diffusion-1.5"
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
@@ -467,9 +467,9 @@ class LoraIntegrationTests(unittest.TestCase):
prompt = "masterpiece, best quality, mountain"
num_inference_steps = 2
pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
).to(torch_device)
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
torch_device
)
initial_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
@@ -505,9 +505,9 @@ class LoraIntegrationTests(unittest.TestCase):
prompt = "masterpiece, best quality, mountain"
num_inference_steps = 2
pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
).to(torch_device)
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
torch_device
)
initial_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
@@ -547,9 +547,9 @@ class LoraIntegrationTests(unittest.TestCase):
def test_not_empty_state_dict(self):
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
pipe = AutoPipelineForText2Image.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(torch_device)
pipe = AutoPipelineForText2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
torch_device
)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
@@ -561,9 +561,9 @@ class LoraIntegrationTests(unittest.TestCase):
def test_load_unload_load_state_dict(self):
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
pipe = AutoPipelineForText2Image.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(torch_device)
pipe = AutoPipelineForText2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
torch_device
)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
@@ -580,9 +580,7 @@ class LoraIntegrationTests(unittest.TestCase):
release_memory(pipe)
def test_sdv1_5_lcm_lora(self):
pipe = DiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
)
pipe = DiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
@@ -610,9 +608,7 @@ class LoraIntegrationTests(unittest.TestCase):
release_memory(pipe)
def test_sdv1_5_lcm_lora_img2img(self):
pipe = AutoPipelineForImage2Image.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
)
pipe = AutoPipelineForImage2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
@@ -653,7 +649,7 @@ class LoraIntegrationTests(unittest.TestCase):
This test simply checks that loading a LoRA with an empty network alpha works fine
See: https://github.com/huggingface/diffusers/issues/5606
"""
pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipeline = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
pipeline.enable_sequential_cpu_offload()
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
+3 -5
View File
@@ -1051,9 +1051,7 @@ class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase):
def test_sd(self):
vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder") # TODO - update
pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", vae=vae, safety_checker=None
)
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", vae=vae, safety_checker=None)
pipe.to(torch_device)
out = pipe(
@@ -1101,7 +1099,7 @@ class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase):
"openai/consistency-decoder", torch_dtype=torch.float16
) # TODO - update
pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
"Jiali/stable-diffusion-1.5",
torch_dtype=torch.float16,
vae=vae,
safety_checker=None,
@@ -1126,7 +1124,7 @@ class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase):
def test_vae_tiling(self):
vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=torch.float16)
pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", vae=vae, safety_checker=None, torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", vae=vae, safety_checker=None, torch_dtype=torch.float16
)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
+14 -16
View File
@@ -73,7 +73,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.to("cuda")
pipe.set_progress_bar_config(disable=None)
@@ -715,7 +715,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -742,7 +742,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -769,7 +769,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-hed")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -796,7 +796,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-mlsd")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -823,7 +823,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-normal")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -850,7 +850,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -877,7 +877,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-scribble")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -904,7 +904,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-seg")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -935,7 +935,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-seg")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.set_progress_bar_config(disable=None)
pipe.enable_attention_slicing()
@@ -961,7 +961,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -993,7 +993,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
@@ -1035,7 +1035,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11e_sd15_shuffle")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -1081,9 +1081,7 @@ class StableDiffusionMultiControlNetPipelineSlowTests(unittest.TestCase):
controlnet_pose = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose")
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
safety_checker=None,
controlnet=[controlnet_pose, controlnet_canny],
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=[controlnet_pose, controlnet_canny]
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -407,7 +407,7 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
@@ -504,7 +504,7 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_inpaint")
pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
@@ -41,7 +41,7 @@ class FlaxControlNetPipelineIntegrationTests(unittest.TestCase):
"lllyasviel/sd-controlnet-canny", from_pt=True, dtype=jnp.bfloat16
)
pipe, params = FlaxStableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
"Jiali/stable-diffusion-1.5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
)
params["controlnet"] = controlnet_params
@@ -86,7 +86,7 @@ class FlaxControlNetPipelineIntegrationTests(unittest.TestCase):
"lllyasviel/sd-controlnet-openpose", from_pt=True, dtype=jnp.bfloat16
)
pipe, params = FlaxStableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
"Jiali/stable-diffusion-1.5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
)
params["controlnet"] = controlnet_params
@@ -170,10 +170,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
pipeline = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
image_encoder=image_encoder,
safety_checker=None,
torch_dtype=self.dtype,
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
)
pipeline.to(torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
@@ -203,10 +200,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_image_to_image(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
image_encoder=image_encoder,
safety_checker=None,
torch_dtype=self.dtype,
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
)
pipeline.to(torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
@@ -238,10 +232,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_inpainting(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
pipeline = StableDiffusionInpaintPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
image_encoder=image_encoder,
safety_checker=None,
torch_dtype=self.dtype,
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
)
pipeline.to(torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
@@ -269,10 +260,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image_model_cpu_offload(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
pipeline = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
image_encoder=image_encoder,
safety_checker=None,
torch_dtype=self.dtype,
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
pipeline.to(torch_device)
@@ -299,10 +287,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image_full_face(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
pipeline = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
image_encoder=image_encoder,
safety_checker=None,
torch_dtype=self.dtype,
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
)
pipeline.to(torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-full-face_sd15.bin")
@@ -319,10 +304,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_unload(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
pipeline = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
image_encoder=image_encoder,
safety_checker=None,
torch_dtype=self.dtype,
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
)
before_processors = [attn_proc.__class__ for attn_proc in pipeline.unet.attn_processors.values()]
pipeline.to(torch_device)
@@ -341,10 +323,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_multi(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
pipeline = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
image_encoder=image_encoder,
safety_checker=None,
torch_dtype=self.dtype,
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
)
pipeline.to(torch_device)
pipeline.load_ip_adapter(
@@ -364,7 +343,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image_face_id(self):
pipeline = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, torch_dtype=self.dtype
"Jiali/stable-diffusion-1.5", safety_checker=None, torch_dtype=self.dtype
)
pipeline.to(torch_device)
pipeline.load_ip_adapter(
@@ -224,7 +224,7 @@ class LEditsPPPipelineStableDiffusionSlowTests(unittest.TestCase):
def test_ledits_pp_editing(self):
pipe = LEditsPPPipelineStableDiffusion.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", safety_checker=None, torch_dtype=torch.float16
)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -34,19 +34,19 @@ class LuminaText2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterM
def get_dummy_components(self):
torch.manual_seed(0)
transformer = LuminaNextDiT2DModel(
sample_size=4,
sample_size=16,
patch_size=2,
in_channels=4,
hidden_size=4,
hidden_size=24,
num_layers=2,
num_attention_heads=1,
num_attention_heads=3,
num_kv_heads=1,
multiple_of=16,
ffn_dim_multiplier=None,
norm_eps=1e-5,
learn_sigma=True,
qk_norm=True,
cross_attention_dim=8,
cross_attention_dim=32,
scaling_factor=1.0,
)
torch.manual_seed(0)
@@ -57,8 +57,8 @@ class LuminaText2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterM
torch.manual_seed(0)
config = GemmaConfig(
head_dim=2,
hidden_size=8,
head_dim=4,
hidden_size=32,
intermediate_size=37,
num_attention_heads=4,
num_hidden_layers=2,
+1 -1
View File
@@ -283,7 +283,7 @@ class StableDiffusionPAGPipelineFastTests(
@require_torch_gpu
class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
pipeline_class = StableDiffusionPAGPipeline
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
repo_id = "Jiali/stable-diffusion-1.5"
def setUp(self):
super().setUp()
@@ -287,7 +287,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
def test_positive_guidance(self):
torch_device = "cuda"
pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -370,7 +370,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
def test_negative_guidance(self):
torch_device = "cuda"
pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -453,7 +453,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
def test_multi_cond_guidance(self):
torch_device = "cuda"
pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -536,9 +536,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
def test_guidance_fp16(self):
torch_device = "cuda"
pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
)
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -250,10 +250,10 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
def test_inference_ddim(self):
ddim_scheduler = DDIMScheduler.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx"
"Jiali/stable-diffusion-1.5", subfolder="scheduler", revision="onnx"
)
sd_pipe = OnnxStableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
"Jiali/stable-diffusion-1.5",
revision="onnx",
scheduler=ddim_scheduler,
safety_checker=None,
@@ -276,10 +276,10 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
def test_inference_k_lms(self):
lms_scheduler = LMSDiscreteScheduler.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx"
"Jiali/stable-diffusion-1.5", subfolder="scheduler", revision="onnx"
)
sd_pipe = OnnxStableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
"Jiali/stable-diffusion-1.5",
revision="onnx",
scheduler=lms_scheduler,
safety_checker=None,
@@ -327,7 +327,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
test_callback_fn.has_been_called = False
pipe = OnnxStableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
"Jiali/stable-diffusion-1.5",
revision="onnx",
safety_checker=None,
feature_extractor=None,
@@ -352,7 +352,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
def test_stable_diffusion_no_safety_checker(self):
pipe = OnnxStableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
"Jiali/stable-diffusion-1.5",
revision="onnx",
safety_checker=None,
feature_extractor=None,
@@ -210,10 +210,10 @@ class OnnxStableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase):
)
init_image = init_image.resize((768, 512))
lms_scheduler = LMSDiscreteScheduler.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx"
"Jiali/stable-diffusion-1.5", subfolder="scheduler", revision="onnx"
)
pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
"Jiali/stable-diffusion-1.5",
revision="onnx",
scheduler=lms_scheduler,
safety_checker=None,
@@ -1332,7 +1332,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
def test_download_from_hub(self):
ckpt_paths = [
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
"https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors",
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors",
]
@@ -1346,10 +1346,8 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
assert image_out.shape == (512, 512, 3)
def test_download_local(self):
ckpt_filename = hf_hub_download(
"stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.safetensors"
)
config_filename = hf_hub_download("stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-inference.yaml")
ckpt_filename = hf_hub_download("Jiali/stable-diffusion-1.5", filename="v1-5-pruned-emaonly.safetensors")
config_filename = hf_hub_download("Jiali/stable-diffusion-1.5", filename="v1-inference.yaml")
pipe = StableDiffusionPipeline.from_single_file(
ckpt_filename, config_files={"v1": config_filename}, torch_dtype=torch.float16
@@ -1404,9 +1402,7 @@ class StableDiffusionPipelineNightlyTests(unittest.TestCase):
assert max_diff < 1e-3
def test_stable_diffusion_1_5_pndm(self):
sd_pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5").to(
torch_device
)
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5").to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device)
@@ -1487,9 +1483,9 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
return inputs
def get_pipeline_output_without_device_map(self):
sd_pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(torch_device)
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
torch_device
)
sd_pipe.set_progress_bar_config(disable=True)
inputs = self.get_inputs()
no_device_map_image = sd_pipe(**inputs).images
@@ -1502,7 +1498,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
no_device_map_image = self.get_pipeline_output_without_device_map()
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
)
sd_pipe_with_device_map.set_progress_bar_config(disable=True)
inputs = self.get_inputs()
@@ -1513,7 +1509,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
def test_components_put_in_right_devices(self):
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
)
assert len(set(sd_pipe_with_device_map.hf_device_map.values())) >= 2
@@ -1522,7 +1518,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
no_device_map_image = self.get_pipeline_output_without_device_map()
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5",
"Jiali/stable-diffusion-1.5",
device_map="balanced",
max_memory={0: "1GB", 1: "1GB"},
torch_dtype=torch.float16,
@@ -1536,7 +1532,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
def test_reset_device_map(self):
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
)
sd_pipe_with_device_map.reset_device_map()
@@ -1548,7 +1544,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
def test_reset_device_map_to(self):
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
)
sd_pipe_with_device_map.reset_device_map()
@@ -1560,7 +1556,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
def test_reset_device_map_enable_model_cpu_offload(self):
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
)
sd_pipe_with_device_map.reset_device_map()
@@ -1572,7 +1568,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
def test_reset_device_map_enable_sequential_cpu_offload(self):
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
)
sd_pipe_with_device_map.reset_device_map()
@@ -566,7 +566,7 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
assert module.device == torch.device("cpu")
def test_img2img_2nd_order(self):
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
sd_pipe.scheduler = HeunDiscreteScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -630,7 +630,7 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3
def test_img2img_safety_checker_works(self):
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -686,7 +686,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
return inputs
def test_img2img_pndm(self):
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -701,7 +701,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
assert max_diff < 1e-3
def test_img2img_ddim(self):
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -717,7 +717,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
assert max_diff < 1e-3
def test_img2img_lms(self):
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -733,7 +733,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
assert max_diff < 1e-3
def test_img2img_dpm(self):
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -767,9 +767,7 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
assert np.abs(expected_slice - image_slice).max() < 1e-3
def test_stable_diffusion_simple_inpaint_ddim(self):
pipe = StableDiffusionInpaintPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
)
pipe = StableDiffusionInpaintPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
pipe.unet.set_default_attn_processor()
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
@@ -975,9 +973,7 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
def test_stable_diffusion_simple_inpaint_ddim(self):
vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5")
pipe = StableDiffusionInpaintPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
)
pipe = StableDiffusionInpaintPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
pipe.vae = vae
pipe.unet.set_default_attn_processor()
pipe.to(torch_device)
@@ -609,7 +609,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
def test_stable_diffusion_adapter_depth_sd_v15(self):
adapter_model = "TencentARC/t2iadapter_depth_sd15v2"
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
sd_model = "Jiali/stable-diffusion-1.5"
prompt = "desk"
image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/desk_depth.png"
input_channels = 3
@@ -636,7 +636,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
def test_stable_diffusion_adapter_zoedepth_sd_v15(self):
adapter_model = "TencentARC/t2iadapter_zoedepth_sd15v1"
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
sd_model = "Jiali/stable-diffusion-1.5"
prompt = "motorcycle"
image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/motorcycle.png"
input_channels = 3
@@ -660,7 +660,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
def test_stable_diffusion_adapter_canny_sd_v15(self):
adapter_model = "TencentARC/t2iadapter_canny_sd15v2"
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
sd_model = "Jiali/stable-diffusion-1.5"
prompt = "toy"
image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png"
input_channels = 1
@@ -688,7 +688,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
def test_stable_diffusion_adapter_sketch_sd15(self):
adapter_model = "TencentARC/t2iadapter_sketch_sd15v2"
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
sd_model = "Jiali/stable-diffusion-1.5"
prompt = "cat"
image_url = (
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/edge.png"
@@ -277,9 +277,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
torch.cuda.empty_cache()
def test_harm_safe_stable_diffusion(self):
sd_pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
)
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -340,9 +338,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_nudity_safe_stable_diffusion(self):
sd_pipe = StableDiffusionPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
)
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
@@ -396,7 +392,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_nudity_safetychecker_safe_stable_diffusion(self):
sd_pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
+2 -2
View File
@@ -40,7 +40,7 @@ from diffusers.utils.testing_utils import slow
PRETRAINED_MODEL_REPO_MAPPING = OrderedDict(
[
("stable-diffusion", "stable-diffusion-v1-5/stable-diffusion-v1-5"),
("stable-diffusion", "Jiali/stable-diffusion-1.5"),
("if", "DeepFloyd/IF-I-XL-v1.0"),
("kandinsky", "kandinsky-community/kandinsky-2-1"),
("kandinsky22", "kandinsky-community/kandinsky-2-2-decoder"),
@@ -539,7 +539,7 @@ class AutoPipelineIntegrationTest(unittest.TestCase):
def test_controlnet(self):
# test from_pretrained
model_repo = "stable-diffusion-v1-5/stable-diffusion-v1-5"
model_repo = "Jiali/stable-diffusion-1.5"
controlnet_repo = "lllyasviel/sd-controlnet-canny"
controlnet = ControlNetModel.from_pretrained(controlnet_repo, torch_dtype=torch.float16)
@@ -40,7 +40,7 @@ class TextToVideoZeroPipelineSlowTests(unittest.TestCase):
torch.cuda.empty_cache()
def test_full_model(self):
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
model_id = "Jiali/stable-diffusion-1.5"
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
generator = torch.Generator(device="cuda").manual_seed(0)
@@ -30,9 +30,7 @@ enable_full_determinism()
@require_torch_gpu
class StableDiffusionControlNetPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
pipeline_class = StableDiffusionControlNetPipeline
ckpt_path = (
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
)
ckpt_path = "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
original_config = (
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
)
@@ -31,7 +31,7 @@ class StableDiffusionControlNetInpaintPipelineSingleFileSlowTests(unittest.TestC
pipeline_class = StableDiffusionControlNetInpaintPipeline
ckpt_path = "https://huggingface.co/botp/stable-diffusion-v1-5-inpainting/blob/main/sd-v1-5-inpainting.ckpt"
original_config = "https://raw.githubusercontent.com/runwayml/stable-diffusion/main/configs/stable-diffusion/v1-inpainting-inference.yaml"
repo_id = "stable-diffusion-v1-5/stable-diffusion-inpainting"
repo_id = "botp/stable-diffusion-v1-5-inpainting"
def setUp(self):
super().setUp()
@@ -29,13 +29,11 @@ enable_full_determinism()
@require_torch_gpu
class StableDiffusionControlNetPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
pipeline_class = StableDiffusionControlNetPipeline
ckpt_path = (
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
)
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
original_config = (
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
)
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
repo_id = "Jiali/stable-diffusion-1.5"
def setUp(self):
super().setUp()
@@ -23,13 +23,11 @@ enable_full_determinism()
@require_torch_gpu
class StableDiffusionImg2ImgPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
pipeline_class = StableDiffusionImg2ImgPipeline
ckpt_path = (
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
)
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
original_config = (
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
)
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
repo_id = "Jiali/stable-diffusion-1.5"
def setUp(self):
super().setUp()
@@ -63,7 +63,7 @@ class StableDiffusionInpaintPipelineSingleFileSlowTests(unittest.TestCase, SDSin
def test_single_file_loading_4_channel_unet(self):
# Test loading single file inpaint with a 4 channel UNet
ckpt_path = "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
pipe = self.pipeline_class.from_single_file(ckpt_path)
assert pipe.unet.config.in_channels == 4
@@ -26,13 +26,11 @@ enable_full_determinism()
@require_torch_gpu
class StableDiffusionPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
pipeline_class = StableDiffusionPipeline
ckpt_path = (
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
)
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
original_config = (
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
)
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
repo_id = "Jiali/stable-diffusion-1.5"
def setUp(self):
super().setUp()