Compare commits
1 Commits
fix-sf
..
dora-fixes
| Author | SHA1 | Date | |
|---|---|---|---|
| 32b1a6fab4 |
@@ -7,7 +7,6 @@ on:
|
||||
|
||||
env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
||||
HF_HOME: /mnt/cache
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
|
||||
@@ -22,7 +22,6 @@ concurrency:
|
||||
|
||||
env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
||||
OMP_NUM_THREADS: 4
|
||||
MKL_NUM_THREADS: 4
|
||||
PYTEST_TIMEOUT: 60
|
||||
|
||||
@@ -14,7 +14,6 @@ env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
||||
PYTEST_TIMEOUT: 600
|
||||
PIPELINE_USAGE_CUTOFF: 50000
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ env:
|
||||
HF_HOME: /mnt/cache
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
||||
PYTEST_TIMEOUT: 600
|
||||
RUN_SLOW: no
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ env:
|
||||
HF_HOME: /mnt/cache
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
||||
PYTEST_TIMEOUT: 600
|
||||
RUN_SLOW: no
|
||||
|
||||
|
||||
@@ -43,7 +43,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
numpy==1.26.4 \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
hf_transfer
|
||||
transformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
@@ -45,7 +45,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
numpy==1.26.4 \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
hf_transfer
|
||||
transformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
@@ -43,7 +43,6 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
numpy==1.26.4 \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
hf_transfer
|
||||
transformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
numpy==1.26.4 \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
hf_transfer
|
||||
transformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
numpy==1.26.4 \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
hf_transfer
|
||||
transformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
@@ -44,7 +44,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
numpy==1.26.4 \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers matplotlib \
|
||||
hf_transfer
|
||||
transformers matplotlib
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
@@ -45,7 +45,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
pytorch-lightning \
|
||||
hf_transfer
|
||||
pytorch-lightning
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
@@ -45,7 +45,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
xformers \
|
||||
hf_transfer
|
||||
xformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
- local: using-diffusers/overview_techniques
|
||||
title: Overview
|
||||
- local: training/distributed_inference
|
||||
title: Distributed inference
|
||||
title: Distributed inference with multiple GPUs
|
||||
- local: using-diffusers/merge_loras
|
||||
title: Merge LoRAs
|
||||
- local: using-diffusers/scheduler_features
|
||||
|
||||
@@ -40,7 +40,6 @@ To generate a video from prompt, run the following Python code:
|
||||
```python
|
||||
import torch
|
||||
from diffusers import TextToVideoZeroPipeline
|
||||
import imageio
|
||||
|
||||
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
|
||||
|
||||
@@ -45,14 +45,6 @@ Many schedulers are implemented from the [k-diffusion](https://github.com/crowso
|
||||
| N/A | [`DEISMultistepScheduler`] | |
|
||||
| N/A | [`UniPCMultistepScheduler`] | |
|
||||
|
||||
## Noise schedules and schedule types
|
||||
| A1111/k-diffusion | 🤗 Diffusers |
|
||||
|--------------------------|----------------------------------------------------------------------------|
|
||||
| Karras | init with `use_karras_sigmas=True` |
|
||||
| sgm_uniform | init with `timestep_spacing="trailing"` |
|
||||
| simple | init with `timestep_spacing="trailing"` |
|
||||
| exponential | init with `timestep_spacing="linspace"`, `use_exponential_sigmas=True` |
|
||||
|
||||
All schedulers are built from the base [`SchedulerMixin`] class which implements low level utilities shared by all schedulers.
|
||||
|
||||
## SchedulerMixin
|
||||
|
||||
@@ -75,8 +75,4 @@ Happy exploring, and thank you for being part of the Diffusers community!
|
||||
<td><a href="https://github.com/cumulo-autumn/StreamDiffusion"> StreamDiffusion </a></td>
|
||||
<td>A Pipeline-Level Solution for Real-Time Interactive Generation</td>
|
||||
</tr>
|
||||
<tr style="border-top: 2px solid black">
|
||||
<td><a href="https://github.com/Netwrck/stable-diffusion-server"> Stable Diffusion Server </a></td>
|
||||
<td>A server configured for Inpainting/Generation/img2img with one stable diffusion model</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
@@ -95,7 +95,7 @@ print(f"Model downloaded at {model_path}")
|
||||
Once you have downloaded a snapshot of the model, you can test it using Apple's Python script.
|
||||
|
||||
```shell
|
||||
python -m python_coreml_stable_diffusion.pipeline --prompt "a photo of an astronaut riding a horse on mars" -i ./models/coreml-stable-diffusion-v1-4_original_packages/original/packages -o </path/to/output/image> --compute-unit CPU_AND_GPU --seed 93
|
||||
python -m python_coreml_stable_diffusion.pipeline --prompt "a photo of an astronaut riding a horse on mars" -i models/coreml-stable-diffusion-v1-4_original_packages -o </path/to/output/image> --compute-unit CPU_AND_GPU --seed 93
|
||||
```
|
||||
|
||||
Pass the path of the downloaded checkpoint with `-i` flag to the script. `--compute-unit` indicates the hardware you want to allow for inference. It must be one of the following options: `ALL`, `CPU_AND_GPU`, `CPU_ONLY`, `CPU_AND_NE`. You may also provide an optional output path, and a seed for reproducibility.
|
||||
|
||||
@@ -10,7 +10,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
# Distributed inference
|
||||
# Distributed inference with multiple GPUs
|
||||
|
||||
On distributed setups, you can run inference across multiple GPUs with 🤗 [Accelerate](https://huggingface.co/docs/accelerate/index) or [PyTorch Distributed](https://pytorch.org/tutorials/beginner/dist_overview.html), which is useful for generating with multiple prompts in parallel.
|
||||
|
||||
@@ -109,131 +109,3 @@ torchrun run_distributed.py --nproc_per_node=2
|
||||
|
||||
> [!TIP]
|
||||
> You can use `device_map` within a [`DiffusionPipeline`] to distribute its model-level components on multiple devices. Refer to the [Device placement](../tutorials/inference_with_big_models#device-placement) guide to learn more.
|
||||
|
||||
## Model sharding
|
||||
|
||||
Modern diffusion systems such as [Flux](../api/pipelines/flux) are very large and have multiple models. For example, [Flux.1-Dev](https://hf.co/black-forest-labs/FLUX.1-dev) is made up of two text encoders - [T5-XXL](https://hf.co/google/t5-v1_1-xxl) and [CLIP-L](https://hf.co/openai/clip-vit-large-patch14) - a [diffusion transformer](../api/models/flux_transformer), and a [VAE](../api/models/autoencoderkl). With a model this size, it can be challenging to run inference on consumer GPUs.
|
||||
|
||||
Model sharding is a technique that distributes models across GPUs when the models don't fit on a single GPU. The example below assumes two 16GB GPUs are available for inference.
|
||||
|
||||
Start by computing the text embeddings with the text encoders. Keep the text encoders on two GPUs by setting `device_map="balanced"`. The `balanced` strategy evenly distributes the model on all available GPUs. Use the `max_memory` parameter to allocate the maximum amount of memory for each text encoder on each GPU.
|
||||
|
||||
> [!TIP]
|
||||
> **Only** load the text encoders for this step! The diffusion transformer and VAE are loaded in a later step to preserve memory.
|
||||
|
||||
```py
|
||||
from diffusers import FluxPipeline
|
||||
import torch
|
||||
|
||||
prompt = "a photo of a dog with cat-like look"
|
||||
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
"black-forest-labs/FLUX.1-dev",
|
||||
transformer=None,
|
||||
vae=None,
|
||||
device_map="balanced",
|
||||
max_memory={0: "16GB", 1: "16GB"},
|
||||
torch_dtype=torch.bfloat16
|
||||
)
|
||||
with torch.no_grad():
|
||||
print("Encoding prompts.")
|
||||
prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
|
||||
prompt=prompt, prompt_2=None, max_sequence_length=512
|
||||
)
|
||||
```
|
||||
|
||||
Once the text embeddings are computed, remove them from the GPU to make space for the diffusion transformer.
|
||||
|
||||
```py
|
||||
import gc
|
||||
|
||||
def flush():
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
del pipeline.text_encoder
|
||||
del pipeline.text_encoder_2
|
||||
del pipeline.tokenizer
|
||||
del pipeline.tokenizer_2
|
||||
del pipeline
|
||||
|
||||
flush()
|
||||
```
|
||||
|
||||
Load the diffusion transformer next which has 12.5B parameters. This time, set `device_map="auto"` to automatically distribute the model across two 16GB GPUs. The `auto` strategy is backed by [Accelerate](https://hf.co/docs/accelerate/index) and available as a part of the [Big Model Inference](https://hf.co/docs/accelerate/concept_guides/big_model_inference) feature. It starts by distributing a model across the fastest device first (GPU) before moving to slower devices like the CPU and hard drive if needed. The trade-off of storing model parameters on slower devices is slower inference latency.
|
||||
|
||||
```py
|
||||
from diffusers import FluxTransformer2DModel
|
||||
import torch
|
||||
|
||||
transformer = FluxTransformer2DModel.from_pretrained(
|
||||
"black-forest-labs/FLUX.1-dev",
|
||||
subfolder="transformer",
|
||||
device_map="auto",
|
||||
torch_dtype=torch.bfloat16
|
||||
)
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> At any point, you can try `print(pipeline.hf_device_map)` to see how the various models are distributed across devices. This is useful for tracking the device placement of the models.
|
||||
|
||||
Add the transformer model to the pipeline for denoising, but set the other model-level components like the text encoders and VAE to `None` because you don't need them yet.
|
||||
|
||||
```py
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
"black-forest-labs/FLUX.1-dev", ,
|
||||
text_encoder=None,
|
||||
text_encoder_2=None,
|
||||
tokenizer=None,
|
||||
tokenizer_2=None,
|
||||
vae=None,
|
||||
transformer=transformer,
|
||||
torch_dtype=torch.bfloat16
|
||||
)
|
||||
|
||||
print("Running denoising.")
|
||||
height, width = 768, 1360
|
||||
latents = pipeline(
|
||||
prompt_embeds=prompt_embeds,
|
||||
pooled_prompt_embeds=pooled_prompt_embeds,
|
||||
num_inference_steps=50,
|
||||
guidance_scale=3.5,
|
||||
height=height,
|
||||
width=width,
|
||||
output_type="latent",
|
||||
).images
|
||||
```
|
||||
|
||||
Remove the pipeline and transformer from memory as they're no longer needed.
|
||||
|
||||
```py
|
||||
del pipeline.transformer
|
||||
del pipeline
|
||||
|
||||
flush()
|
||||
```
|
||||
|
||||
Finally, decode the latents with the VAE into an image. The VAE is typically small enough to be loaded on a single GPU.
|
||||
|
||||
```py
|
||||
from diffusers import AutoencoderKL
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
import torch
|
||||
|
||||
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=torch.bfloat16).to("cuda")
|
||||
vae_scale_factor = 2 ** (len(vae.config.block_out_channels))
|
||||
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
|
||||
|
||||
with torch.no_grad():
|
||||
print("Running decoding.")
|
||||
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
|
||||
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
|
||||
|
||||
image = vae.decode(latents, return_dict=False)[0]
|
||||
image = image_processor.postprocess(image, output_type="pil")
|
||||
image[0].save("split_transformer.png")
|
||||
```
|
||||
|
||||
By selectively loading and unloading the models you need at a given stage and sharding the largest models across multiple GPUs, it is possible to run inference with large models on consumer GPUs.
|
||||
|
||||
@@ -171,13 +171,14 @@ def latents_to_rgb(latents):
|
||||
weights = (
|
||||
(60, -60, 25, -70),
|
||||
(60, -5, 15, -50),
|
||||
(60, 10, -5, -35),
|
||||
(60, 10, -5, -35)
|
||||
)
|
||||
|
||||
weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device))
|
||||
biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device)
|
||||
rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(-1).unsqueeze(-1)
|
||||
image_array = rgb_tensor.clamp(0, 255).byte().cpu().numpy().transpose(1, 2, 0)
|
||||
image_array = rgb_tensor.clamp(0, 255)[0].byte().cpu().numpy()
|
||||
image_array = image_array.transpose(1, 2, 0)
|
||||
|
||||
return Image.fromarray(image_array)
|
||||
```
|
||||
@@ -188,7 +189,7 @@ def latents_to_rgb(latents):
|
||||
def decode_tensors(pipe, step, timestep, callback_kwargs):
|
||||
latents = callback_kwargs["latents"]
|
||||
|
||||
image = latents_to_rgb(latents[0])
|
||||
image = latents_to_rgb(latents)
|
||||
image.save(f"{step}.png")
|
||||
|
||||
return callback_kwargs
|
||||
|
||||
@@ -10,7 +10,6 @@ Please also check out our [Community Scripts](https://github.com/huggingface/dif
|
||||
|
||||
| Example | Description | Code Example | Colab | Author |
|
||||
|:--------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------:|
|
||||
|Flux with CFG|[Flux with CFG](https://github.com/ToTheBeginning/PuLID/blob/main/docs/pulid_for_flux.md) provides an implementation of using CFG in [Flux](https://blackforestlabs.ai/announcing-black-forest-labs/).|[Flux with CFG](#flux-with-cfg)|NA|[Linoy Tsaban](https://github.com/linoytsaban), [Apolinário](https://github.com/apolinario), and [Sayak Paul](https://github.com/sayakpaul)|
|
||||
|Differential Diffusion|[Differential Diffusion](https://github.com/exx8/differential-diffusion) modifies an image according to a text prompt, and according to a map that specifies the amount of change in each region.|[Differential Diffusion](#differential-diffusion)|[](https://huggingface.co/spaces/exx8/differential-diffusion) [](https://colab.research.google.com/github/exx8/differential-diffusion/blob/main/examples/SD2.ipynb)|[Eran Levin](https://github.com/exx8) and [Ohad Fried](https://www.ohadf.com/)|
|
||||
| HD-Painter | [HD-Painter](https://github.com/Picsart-AI-Research/HD-Painter) enables prompt-faithfull and high resolution (up to 2k) image inpainting upon any diffusion-based image inpainting method. | [HD-Painter](#hd-painter) | [](https://huggingface.co/spaces/PAIR/HD-Painter) | [Manukyan Hayk](https://github.com/haikmanukyan) and [Sargsyan Andranik](https://github.com/AndranikSargsyan) |
|
||||
| Marigold Monocular Depth Estimation | A universal monocular depth estimator, utilizing Stable Diffusion, delivering sharp predictions in the wild. (See the [project page](https://marigoldmonodepth.github.io) and [full codebase](https://github.com/prs-eth/marigold) for more details.) | [Marigold Depth Estimation](#marigold-depth-estimation) | [](https://huggingface.co/spaces/toshas/marigold) [](https://colab.research.google.com/drive/12G8reD13DdpMie5ZQlaFNo2WCGeNUH-u?usp=sharing) | [Bingxin Ke](https://github.com/markkua) and [Anton Obukhov](https://github.com/toshas) |
|
||||
@@ -83,36 +82,6 @@ pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion
|
||||
|
||||
## Example usages
|
||||
|
||||
### Flux with CFG
|
||||
|
||||
Know more about Flux [here](https://blackforestlabs.ai/announcing-black-forest-labs/). Since Flux doesn't use CFG, this implementation provides one, inspired by the [PuLID Flux adaptation](https://github.com/ToTheBeginning/PuLID/blob/main/docs/pulid_for_flux.md).
|
||||
|
||||
Example usage:
|
||||
|
||||
```py
|
||||
from diffusers import DiffusionPipeline
|
||||
import torch
|
||||
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
"black-forest-labs/FLUX.1-dev",
|
||||
torch_dtype=torch.bfloat16,
|
||||
custom_pipeline="pipeline_flux_with_cfg"
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
prompt = "a watercolor painting of a unicorn"
|
||||
negative_prompt = "pink"
|
||||
|
||||
img = pipeline(
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
true_cfg=1.5,
|
||||
guidance_scale=3.5,
|
||||
num_images_per_prompt=1,
|
||||
generator=torch.manual_seed(0)
|
||||
).images[0]
|
||||
img.save("cfg_flux.png")
|
||||
```
|
||||
|
||||
### Differential Diffusion
|
||||
|
||||
**Eran Levin, Ohad Fried**
|
||||
|
||||
@@ -289,104 +289,80 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
self,
|
||||
prompt: Union[str, List[str]],
|
||||
prompt_2: Union[str, List[str]],
|
||||
negative_prompt: Union[str, List[str]] = None,
|
||||
negative_prompt_2: Union[str, List[str]] = None,
|
||||
device: Optional[torch.device] = None,
|
||||
num_images_per_prompt: int = 1,
|
||||
prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||
max_sequence_length: int = 512,
|
||||
lora_scale: Optional[float] = None,
|
||||
do_true_cfg: bool = False,
|
||||
):
|
||||
r"""
|
||||
|
||||
Args:
|
||||
prompt (`str` or `List[str]`, *optional*):
|
||||
prompt to be encoded
|
||||
prompt_2 (`str` or `List[str]`, *optional*):
|
||||
The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
|
||||
used in all text-encoders
|
||||
device: (`torch.device`):
|
||||
torch device
|
||||
num_images_per_prompt (`int`):
|
||||
number of images that should be generated per prompt
|
||||
prompt_embeds (`torch.FloatTensor`, *optional*):
|
||||
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
||||
provided, text embeddings will be generated from `prompt` input argument.
|
||||
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
||||
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
||||
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
||||
lora_scale (`float`, *optional*):
|
||||
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
||||
"""
|
||||
device = device or self._execution_device
|
||||
|
||||
# Set LoRA scale if applicable
|
||||
# set lora scale so that monkey patched LoRA
|
||||
# function of text encoder can correctly access it
|
||||
if lora_scale is not None and isinstance(self, FluxLoraLoaderMixin):
|
||||
self._lora_scale = lora_scale
|
||||
|
||||
# dynamically adjust the LoRA scale
|
||||
if self.text_encoder is not None and USE_PEFT_BACKEND:
|
||||
scale_lora_layers(self.text_encoder, lora_scale)
|
||||
if self.text_encoder_2 is not None and USE_PEFT_BACKEND:
|
||||
scale_lora_layers(self.text_encoder_2, lora_scale)
|
||||
|
||||
prompt = [prompt] if isinstance(prompt, str) else prompt
|
||||
batch_size = len(prompt)
|
||||
|
||||
if do_true_cfg and negative_prompt is not None:
|
||||
negative_prompt = [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt
|
||||
negative_batch_size = len(negative_prompt)
|
||||
|
||||
if negative_batch_size != batch_size:
|
||||
raise ValueError(
|
||||
f"Negative prompt batch size ({negative_batch_size}) does not match prompt batch size ({batch_size})"
|
||||
)
|
||||
|
||||
# Concatenate prompts
|
||||
prompts = prompt + negative_prompt
|
||||
prompts_2 = (
|
||||
prompt_2 + negative_prompt_2 if prompt_2 is not None and negative_prompt_2 is not None else None
|
||||
)
|
||||
else:
|
||||
prompts = prompt
|
||||
prompts_2 = prompt_2
|
||||
|
||||
if prompt_embeds is None:
|
||||
if prompts_2 is None:
|
||||
prompts_2 = prompts
|
||||
prompt_2 = prompt_2 or prompt
|
||||
prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
|
||||
|
||||
# Get pooled prompt embeddings from CLIPTextModel
|
||||
# We only use the pooled prompt output from the CLIPTextModel
|
||||
pooled_prompt_embeds = self._get_clip_prompt_embeds(
|
||||
prompt=prompts,
|
||||
prompt=prompt,
|
||||
device=device,
|
||||
num_images_per_prompt=num_images_per_prompt,
|
||||
)
|
||||
prompt_embeds = self._get_t5_prompt_embeds(
|
||||
prompt=prompts_2,
|
||||
prompt=prompt_2,
|
||||
num_images_per_prompt=num_images_per_prompt,
|
||||
max_sequence_length=max_sequence_length,
|
||||
device=device,
|
||||
)
|
||||
|
||||
if do_true_cfg and negative_prompt is not None:
|
||||
# Split embeddings back into positive and negative parts
|
||||
total_batch_size = batch_size * num_images_per_prompt
|
||||
positive_indices = slice(0, total_batch_size)
|
||||
negative_indices = slice(total_batch_size, 2 * total_batch_size)
|
||||
|
||||
positive_pooled_prompt_embeds = pooled_prompt_embeds[positive_indices]
|
||||
negative_pooled_prompt_embeds = pooled_prompt_embeds[negative_indices]
|
||||
|
||||
positive_prompt_embeds = prompt_embeds[positive_indices]
|
||||
negative_prompt_embeds = prompt_embeds[negative_indices]
|
||||
|
||||
pooled_prompt_embeds = positive_pooled_prompt_embeds
|
||||
prompt_embeds = positive_prompt_embeds
|
||||
|
||||
# Unscale LoRA layers
|
||||
if self.text_encoder is not None:
|
||||
if isinstance(self, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
|
||||
# Retrieve the original scale by scaling back the LoRA layers
|
||||
unscale_lora_layers(self.text_encoder, lora_scale)
|
||||
|
||||
if self.text_encoder_2 is not None:
|
||||
if isinstance(self, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
|
||||
# Retrieve the original scale by scaling back the LoRA layers
|
||||
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
||||
|
||||
dtype = self.text_encoder.dtype if self.text_encoder is not None else self.transformer.dtype
|
||||
text_ids = torch.zeros(prompt_embeds.shape[1], 3).to(device=device, dtype=dtype)
|
||||
|
||||
if do_true_cfg and negative_prompt is not None:
|
||||
return (
|
||||
prompt_embeds,
|
||||
pooled_prompt_embeds,
|
||||
text_ids,
|
||||
negative_prompt_embeds,
|
||||
negative_pooled_prompt_embeds,
|
||||
)
|
||||
else:
|
||||
return prompt_embeds, pooled_prompt_embeds, text_ids, None, None
|
||||
return prompt_embeds, pooled_prompt_embeds, text_ids
|
||||
|
||||
def check_inputs(
|
||||
self,
|
||||
@@ -711,33 +687,38 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
lora_scale = (
|
||||
self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
|
||||
)
|
||||
do_true_cfg = true_cfg > 1 and negative_prompt is not None
|
||||
(
|
||||
prompt_embeds,
|
||||
pooled_prompt_embeds,
|
||||
text_ids,
|
||||
negative_prompt_embeds,
|
||||
negative_pooled_prompt_embeds,
|
||||
) = self.encode_prompt(
|
||||
prompt=prompt,
|
||||
prompt_2=prompt_2,
|
||||
negative_prompt=negative_prompt,
|
||||
negative_prompt_2=negative_prompt_2,
|
||||
prompt_embeds=prompt_embeds,
|
||||
pooled_prompt_embeds=pooled_prompt_embeds,
|
||||
negative_prompt_embeds=negative_prompt_embeds,
|
||||
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
||||
device=device,
|
||||
num_images_per_prompt=num_images_per_prompt,
|
||||
max_sequence_length=max_sequence_length,
|
||||
lora_scale=lora_scale,
|
||||
do_true_cfg=do_true_cfg,
|
||||
)
|
||||
|
||||
# perform "real" CFG as suggested for distilled Flux models in https://github.com/ToTheBeginning/PuLID/blob/main/docs/pulid_for_flux.md
|
||||
do_true_cfg = true_cfg > 1 and negative_prompt is not None
|
||||
if do_true_cfg:
|
||||
# Concatenate embeddings
|
||||
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
||||
pooled_prompt_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0)
|
||||
(
|
||||
negative_prompt_embeds,
|
||||
negative_pooled_prompt_embeds,
|
||||
negative_text_ids,
|
||||
) = self.encode_prompt(
|
||||
prompt=negative_prompt,
|
||||
prompt_2=negative_prompt_2,
|
||||
prompt_embeds=negative_prompt_embeds,
|
||||
pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
||||
device=device,
|
||||
num_images_per_prompt=num_images_per_prompt,
|
||||
max_sequence_length=max_sequence_length,
|
||||
lora_scale=lora_scale,
|
||||
)
|
||||
|
||||
# 4. Prepare latent variables
|
||||
num_channels_latents = self.transformer.config.in_channels // 4
|
||||
@@ -773,26 +754,24 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
|
||||
self._num_timesteps = len(timesteps)
|
||||
|
||||
# handle guidance
|
||||
if self.transformer.config.guidance_embeds:
|
||||
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
|
||||
guidance = guidance.expand(latents.shape[0])
|
||||
else:
|
||||
guidance = None
|
||||
|
||||
# 6. Denoising loop
|
||||
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
||||
for i, t in enumerate(timesteps):
|
||||
if self.interrupt:
|
||||
continue
|
||||
|
||||
latent_model_input = torch.cat([latents] * 2) if do_true_cfg else latents
|
||||
|
||||
# handle guidance
|
||||
if self.transformer.config.guidance_embeds:
|
||||
guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32)
|
||||
guidance = guidance.expand(latent_model_input.shape[0])
|
||||
else:
|
||||
guidance = None
|
||||
|
||||
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
||||
timestep = t.expand(latent_model_input.shape[0]).to(latent_model_input.dtype)
|
||||
timestep = t.expand(latents.shape[0]).to(latents.dtype)
|
||||
|
||||
noise_pred = self.transformer(
|
||||
hidden_states=latent_model_input,
|
||||
hidden_states=latents,
|
||||
timestep=timestep / 1000,
|
||||
guidance=guidance,
|
||||
pooled_projections=pooled_prompt_embeds,
|
||||
@@ -804,7 +783,18 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
)[0]
|
||||
|
||||
if do_true_cfg:
|
||||
neg_noise_pred, noise_pred = noise_pred.chunk(2)
|
||||
neg_noise_pred = self.transformer(
|
||||
hidden_states=latents,
|
||||
timestep=timestep / 1000,
|
||||
guidance=guidance,
|
||||
pooled_projections=negative_pooled_prompt_embeds,
|
||||
encoder_hidden_states=negative_prompt_embeds,
|
||||
txt_ids=negative_text_ids,
|
||||
img_ids=latent_image_ids,
|
||||
joint_attention_kwargs=self.joint_attention_kwargs,
|
||||
return_dict=False,
|
||||
)[0]
|
||||
|
||||
noise_pred = neg_noise_pred + true_cfg * (noise_pred - neg_noise_pred)
|
||||
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
|
||||
@@ -31,7 +31,7 @@ import torch.utils.checkpoint
|
||||
import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed
|
||||
from accelerate.utils import ProjectConfiguration, set_seed
|
||||
from datasets import load_dataset
|
||||
from huggingface_hub import create_repo, upload_folder
|
||||
from packaging import version
|
||||
@@ -899,13 +899,12 @@ def main(args):
|
||||
logging_dir = Path(args.output_dir, args.logging_dir)
|
||||
|
||||
accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
|
||||
kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
|
||||
|
||||
accelerator = Accelerator(
|
||||
gradient_accumulation_steps=args.gradient_accumulation_steps,
|
||||
mixed_precision=args.mixed_precision,
|
||||
log_with=args.report_to,
|
||||
project_config=accelerator_project_config,
|
||||
kwargs_handlers=[kwargs],
|
||||
)
|
||||
|
||||
# Disable AMP for MPS.
|
||||
|
||||
@@ -153,12 +153,9 @@ def _convert_non_diffusers_lora_to_diffusers(state_dict, unet_name="unet", text_
|
||||
"You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`."
|
||||
)
|
||||
|
||||
# Iterate over all LoRA weights.
|
||||
all_lora_keys = list(state_dict.keys())
|
||||
for key in all_lora_keys:
|
||||
if not key.endswith("lora_down.weight"):
|
||||
continue
|
||||
|
||||
# every down weight has a corresponding up weight and potentially an alpha weight
|
||||
lora_keys = [k for k in state_dict.keys() if k.endswith("lora_down.weight")]
|
||||
for key in lora_keys:
|
||||
# Extract LoRA name.
|
||||
lora_name = key.split(".")[0]
|
||||
|
||||
@@ -177,9 +174,12 @@ def _convert_non_diffusers_lora_to_diffusers(state_dict, unet_name="unet", text_
|
||||
# Store DoRA scale if present.
|
||||
if dora_present_in_unet:
|
||||
dora_scale_key_to_replace = "_lora.down." if "_lora.down." in diffusers_name else ".lora.down."
|
||||
unet_state_dict[
|
||||
diffusers_name.replace(dora_scale_key_to_replace, ".lora_magnitude_vector.")
|
||||
] = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
|
||||
new_key = diffusers_name.replace(dora_scale_key_to_replace, ".lora_magnitude_vector.")
|
||||
# dora_weight = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
|
||||
dora_weight = state_dict.pop(lora_name + ".dora_scale")
|
||||
if dora_weight.dim() <= 2:
|
||||
dora_weight = dora_weight.squeeze()
|
||||
unet_state_dict[new_key] = dora_weight
|
||||
|
||||
# Handle text encoder LoRAs.
|
||||
elif lora_name.startswith(("lora_te_", "lora_te1_", "lora_te2_")):
|
||||
@@ -194,18 +194,24 @@ def _convert_non_diffusers_lora_to_diffusers(state_dict, unet_name="unet", text_
|
||||
te2_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
|
||||
|
||||
# Store DoRA scale if present.
|
||||
if dora_present_in_te or dora_present_in_te2:
|
||||
if (dora_present_in_te or dora_present_in_te2):
|
||||
dora_scale_key_to_replace_te = (
|
||||
"_lora.down." if "_lora.down." in diffusers_name else ".lora_linear_layer."
|
||||
)
|
||||
if lora_name.startswith(("lora_te_", "lora_te1_")):
|
||||
te_state_dict[
|
||||
diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")
|
||||
] = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
|
||||
new_key = diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")
|
||||
# dora_weight = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
|
||||
dora_weight = state_dict.pop(lora_name + ".dora_scale")
|
||||
if dora_weight.dim() <= 2:
|
||||
dora_weight = dora_weight.squeeze()
|
||||
te_state_dict[new_key] = dora_weight
|
||||
elif lora_name.startswith("lora_te2_"):
|
||||
te2_state_dict[
|
||||
diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")
|
||||
] = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
|
||||
new_key = diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")
|
||||
# dora_weight = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
|
||||
dora_weight = state_dict.pop(lora_name + ".dora_scale")
|
||||
if dora_weight.dim() <= 2:
|
||||
dora_weight = dora_weight.squeeze()
|
||||
te2_state_dict[new_key] = dora_weight
|
||||
|
||||
# Store alpha if present.
|
||||
if lora_name_alpha in state_dict:
|
||||
@@ -214,7 +220,8 @@ def _convert_non_diffusers_lora_to_diffusers(state_dict, unet_name="unet", text_
|
||||
|
||||
# Check if any keys remain.
|
||||
if len(state_dict) > 0:
|
||||
raise ValueError(f"The following keys have not been correctly renamed: \n\n {', '.join(state_dict.keys())}")
|
||||
all_keys_remaining = sorted(list(state_dict.keys()))
|
||||
raise ValueError(f"The following keys have not been correctly renamed: \n\n {', '.join(all_keys_remaining)}")
|
||||
|
||||
logger.info("Non-diffusers checkpoint detected.")
|
||||
|
||||
@@ -285,7 +292,7 @@ def _convert_unet_lora_key(key):
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
return diffusers_name
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
# limitations under the License.
|
||||
"""Conversion script for the Stable Diffusion checkpoints."""
|
||||
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
from contextlib import nullcontext
|
||||
@@ -92,11 +91,11 @@ DIFFUSERS_DEFAULT_PIPELINE_PATHS = {
|
||||
"xl_inpaint": {"pretrained_model_name_or_path": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1"},
|
||||
"playground-v2-5": {"pretrained_model_name_or_path": "playgroundai/playground-v2.5-1024px-aesthetic"},
|
||||
"upscale": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-x4-upscaler"},
|
||||
"inpainting": {"pretrained_model_name_or_path": "stable-diffusion-v1-5/stable-diffusion-inpainting"},
|
||||
"inpainting": {"pretrained_model_name_or_path": "Lykon/dreamshaper-8-inpainting"},
|
||||
"inpainting_v2": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-inpainting"},
|
||||
"controlnet": {"pretrained_model_name_or_path": "lllyasviel/control_v11p_sd15_canny"},
|
||||
"v2": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-1"},
|
||||
"v1": {"pretrained_model_name_or_path": "stable-diffusion-v1-5/stable-diffusion-v1-5"},
|
||||
"v1": {"pretrained_model_name_or_path": "Lykon/dreamshaper-8"},
|
||||
"stable_cascade_stage_b": {"pretrained_model_name_or_path": "stabilityai/stable-cascade", "subfolder": "decoder"},
|
||||
"stable_cascade_stage_b_lite": {
|
||||
"pretrained_model_name_or_path": "stabilityai/stable-cascade",
|
||||
@@ -542,7 +541,6 @@ def infer_diffusers_model_type(checkpoint):
|
||||
def fetch_diffusers_config(checkpoint):
|
||||
model_type = infer_diffusers_model_type(checkpoint)
|
||||
model_path = DIFFUSERS_DEFAULT_PIPELINE_PATHS[model_type]
|
||||
model_path = copy.deepcopy(model_path)
|
||||
|
||||
return model_path
|
||||
|
||||
@@ -1513,7 +1511,7 @@ def _legacy_load_scheduler(
|
||||
)
|
||||
deprecate("prediction_type", "1.0.0", deprecation_message)
|
||||
|
||||
scheduler_config = copy.deepcopy(SCHEDULER_DEFAULT_CONFIG)
|
||||
scheduler_config = SCHEDULER_DEFAULT_CONFIG
|
||||
model_type = infer_diffusers_model_type(checkpoint=checkpoint)
|
||||
|
||||
global_step = checkpoint["global_step"] if "global_step" in checkpoint else None
|
||||
|
||||
@@ -336,7 +336,7 @@ class SD3ControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
|
||||
return custom_forward
|
||||
|
||||
ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
|
||||
encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
|
||||
hidden_states = torch.utils.checkpoint.checkpoint(
|
||||
create_custom_forward(block),
|
||||
hidden_states,
|
||||
encoder_hidden_states,
|
||||
|
||||
@@ -93,20 +93,24 @@ def get_parameter_device(parameter: torch.nn.Module) -> torch.device:
|
||||
|
||||
def get_parameter_dtype(parameter: torch.nn.Module) -> torch.dtype:
|
||||
try:
|
||||
return next(parameter.parameters()).dtype
|
||||
params = tuple(parameter.parameters())
|
||||
if len(params) > 0:
|
||||
return params[0].dtype
|
||||
|
||||
buffers = tuple(parameter.buffers())
|
||||
if len(buffers) > 0:
|
||||
return buffers[0].dtype
|
||||
|
||||
except StopIteration:
|
||||
try:
|
||||
return next(parameter.buffers()).dtype
|
||||
except StopIteration:
|
||||
# For torch.nn.DataParallel compatibility in PyTorch 1.5
|
||||
# For torch.nn.DataParallel compatibility in PyTorch 1.5
|
||||
|
||||
def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
|
||||
tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
|
||||
return tuples
|
||||
def find_tensor_attributes(module: torch.nn.Module) -> List[Tuple[str, Tensor]]:
|
||||
tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
|
||||
return tuples
|
||||
|
||||
gen = parameter._named_members(get_members_fn=find_tensor_attributes)
|
||||
first_tuple = next(gen)
|
||||
return first_tuple[1].dtype
|
||||
gen = parameter._named_members(get_members_fn=find_tensor_attributes)
|
||||
first_tuple = next(gen)
|
||||
return first_tuple[1].dtype
|
||||
|
||||
|
||||
class ModelMixin(torch.nn.Module, PushToHubMixin):
|
||||
|
||||
@@ -101,10 +101,10 @@ class DDPMPipeline(DiffusionPipeline):
|
||||
|
||||
if self.device.type == "mps":
|
||||
# randn does not work reproducibly on mps
|
||||
image = randn_tensor(image_shape, generator=generator, dtype=self.unet.dtype)
|
||||
image = randn_tensor(image_shape, generator=generator)
|
||||
image = image.to(self.device)
|
||||
else:
|
||||
image = randn_tensor(image_shape, generator=generator, device=self.device, dtype=self.unet.dtype)
|
||||
image = randn_tensor(image_shape, generator=generator, device=self.device)
|
||||
|
||||
# set step values
|
||||
self.scheduler.set_timesteps(num_inference_steps)
|
||||
|
||||
@@ -111,8 +111,6 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
timestep_spacing (`str`, defaults to `"linspace"`):
|
||||
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
||||
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
||||
@@ -140,12 +138,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
solver_type: str = "logrho",
|
||||
lower_order_final: bool = True,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -260,9 +255,6 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
||||
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
else:
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
|
||||
@@ -374,28 +366,6 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def convert_model_output(
|
||||
self,
|
||||
model_output: torch.Tensor,
|
||||
|
||||
@@ -161,8 +161,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
use_lu_lambdas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use the uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the noise schedule during
|
||||
the sampling process. If `True`, the sigmas and time steps are determined according to a sequence of
|
||||
@@ -208,7 +206,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
lower_order_final: bool = True,
|
||||
euler_at_final: bool = False,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
use_lu_lambdas: Optional[bool] = False,
|
||||
final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min"
|
||||
lambda_min_clipped: float = -float("inf"),
|
||||
@@ -217,8 +214,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
steps_offset: int = 0,
|
||||
rescale_betas_zero_snr: bool = False,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
|
||||
deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
|
||||
deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)
|
||||
@@ -335,8 +330,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
raise ValueError("Cannot use `timesteps` with `config.use_karras_sigmas = True`")
|
||||
if timesteps is not None and self.config.use_lu_lambdas:
|
||||
raise ValueError("Cannot use `timesteps` with `config.use_lu_lambdas = True`")
|
||||
if timesteps is not None and self.config.use_exponential_sigmas:
|
||||
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
|
||||
|
||||
if timesteps is not None:
|
||||
timesteps = np.array(timesteps).astype(np.int64)
|
||||
@@ -385,9 +378,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
lambdas = self._convert_to_lu(in_lambdas=lambdas, num_inference_steps=num_inference_steps)
|
||||
sigmas = np.exp(lambdas)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
else:
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
|
||||
@@ -520,28 +510,6 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
lambdas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return lambdas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def convert_model_output(
|
||||
self,
|
||||
model_output: torch.Tensor,
|
||||
|
||||
@@ -124,8 +124,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
lambda_min_clipped (`float`, defaults to `-inf`):
|
||||
Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
|
||||
cosine (`squaredcos_cap_v2`) noise schedule.
|
||||
@@ -160,14 +158,11 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
lower_order_final: bool = True,
|
||||
euler_at_final: bool = False,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
lambda_min_clipped: float = -float("inf"),
|
||||
variance_type: Optional[str] = None,
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if algorithm_type in ["dpmsolver", "sde-dpmsolver"]:
|
||||
deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
|
||||
deprecate("algorithm_types dpmsolver and sde-dpmsolver", "1.0.0", deprecation_message)
|
||||
@@ -218,7 +213,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
self._step_index = None
|
||||
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
||||
self.use_karras_sigmas = use_karras_sigmas
|
||||
self.use_exponential_sigmas = use_exponential_sigmas
|
||||
|
||||
@property
|
||||
def step_index(self):
|
||||
@@ -273,9 +267,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
||||
timesteps = timesteps.copy().astype(np.int64)
|
||||
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
else:
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
sigma_max = (
|
||||
@@ -394,28 +385,6 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.convert_model_output
|
||||
def convert_model_output(
|
||||
self,
|
||||
|
||||
@@ -160,8 +160,6 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
noise_sampler_seed (`int`, *optional*, defaults to `None`):
|
||||
The random seed to use for the noise sampler. If `None`, a random seed is generated.
|
||||
timestep_spacing (`str`, defaults to `"linspace"`):
|
||||
@@ -184,13 +182,10 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
prediction_type: str = "epsilon",
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
noise_sampler_seed: Optional[int] = None,
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -346,9 +341,6 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
||||
if self.config.use_karras_sigmas:
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
|
||||
second_order_timesteps = self._second_order_timesteps(sigmas, log_sigmas)
|
||||
|
||||
@@ -429,28 +421,6 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
@property
|
||||
def state_in_first_order(self):
|
||||
return self.sample is None
|
||||
|
||||
@@ -123,8 +123,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
final_sigmas_type (`str`, *optional*, defaults to `"zero"`):
|
||||
The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final
|
||||
sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0.
|
||||
@@ -156,13 +154,10 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
||||
solver_type: str = "midpoint",
|
||||
lower_order_final: bool = False,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min"
|
||||
lambda_min_clipped: float = -float("inf"),
|
||||
variance_type: Optional[str] = None,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if algorithm_type == "dpmsolver":
|
||||
deprecation_message = "algorithm_type `dpmsolver` is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead"
|
||||
deprecate("algorithm_types=dpmsolver", "1.0.0", deprecation_message)
|
||||
@@ -305,8 +300,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
||||
raise ValueError("Must pass exactly one of `num_inference_steps` or `timesteps`.")
|
||||
if timesteps is not None and self.config.use_karras_sigmas:
|
||||
raise ValueError("Cannot use `timesteps` when `config.use_karras_sigmas=True`.")
|
||||
if timesteps is not None and self.config.use_exponential_sigmas:
|
||||
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
|
||||
|
||||
num_inference_steps = num_inference_steps or len(timesteps)
|
||||
self.num_inference_steps = num_inference_steps
|
||||
@@ -330,9 +323,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = np.flip(sigmas).copy()
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
else:
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
|
||||
@@ -462,28 +452,6 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def convert_model_output(
|
||||
self,
|
||||
model_output: torch.Tensor,
|
||||
|
||||
@@ -158,8 +158,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
timestep_spacing (`str`, defaults to `"linspace"`):
|
||||
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
||||
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
||||
@@ -188,7 +186,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
prediction_type: str = "epsilon",
|
||||
interpolation_type: str = "linear",
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
sigma_min: Optional[float] = None,
|
||||
sigma_max: Optional[float] = None,
|
||||
timestep_spacing: str = "linspace",
|
||||
@@ -197,8 +194,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
rescale_betas_zero_snr: bool = False,
|
||||
final_sigmas_type: str = "zero", # can be "zero" or "sigma_min"
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -240,7 +235,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
|
||||
self.is_scale_input_called = False
|
||||
self.use_karras_sigmas = use_karras_sigmas
|
||||
self.use_exponential_sigmas = use_exponential_sigmas
|
||||
|
||||
self._step_index = None
|
||||
self._begin_index = None
|
||||
@@ -338,8 +332,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
raise ValueError("Can only pass one of `num_inference_steps` or `timesteps` or `sigmas`.")
|
||||
if timesteps is not None and self.config.use_karras_sigmas:
|
||||
raise ValueError("Cannot set `timesteps` with `config.use_karras_sigmas = True`.")
|
||||
if timesteps is not None and self.config.use_exponential_sigmas:
|
||||
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
|
||||
if (
|
||||
timesteps is not None
|
||||
and self.config.timestep_type == "continuous"
|
||||
@@ -404,10 +396,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
|
||||
if self.config.final_sigmas_type == "sigma_min":
|
||||
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
|
||||
elif self.config.final_sigmas_type == "zero":
|
||||
@@ -480,28 +468,6 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from https://github.com/crowsonkb/k-diffusion/blob/686dbad0f39640ea25c8a8c6a6e56bb40eacefa2/k_diffusion/sampling.py#L26
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def index_for_timestep(self, timestep, schedule_timesteps=None):
|
||||
if schedule_timesteps is None:
|
||||
schedule_timesteps = self.timesteps
|
||||
|
||||
@@ -97,8 +97,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
timestep_spacing (`str`, defaults to `"linspace"`):
|
||||
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
||||
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
||||
@@ -119,14 +117,11 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
prediction_type: str = "epsilon",
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
clip_sample: Optional[bool] = False,
|
||||
clip_sample_range: float = 1.0,
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -256,8 +251,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
|
||||
if timesteps is not None and self.config.use_karras_sigmas:
|
||||
raise ValueError("Cannot use `timesteps` with `config.use_karras_sigmas = True`")
|
||||
if timesteps is not None and self.config.use_exponential_sigmas:
|
||||
raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.")
|
||||
|
||||
num_inference_steps = num_inference_steps or len(timesteps)
|
||||
self.num_inference_steps = num_inference_steps
|
||||
@@ -293,9 +286,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
if self.config.use_karras_sigmas:
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
|
||||
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
|
||||
sigmas = torch.from_numpy(sigmas).to(device=device)
|
||||
@@ -364,28 +354,6 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
@property
|
||||
def state_in_first_order(self):
|
||||
return self.dt is None
|
||||
|
||||
@@ -91,8 +91,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
||||
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
||||
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
||||
@@ -116,13 +114,10 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
beta_schedule: str = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
prediction_type: str = "epsilon",
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -255,9 +250,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
if self.config.use_karras_sigmas:
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
|
||||
self.log_sigmas = torch.from_numpy(log_sigmas).to(device)
|
||||
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
|
||||
@@ -354,28 +346,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
@property
|
||||
def state_in_first_order(self):
|
||||
return self.sample is None
|
||||
|
||||
@@ -90,8 +90,6 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
||||
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
||||
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
||||
@@ -115,13 +113,10 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
beta_schedule: str = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
prediction_type: str = "epsilon",
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -254,9 +249,6 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
if self.config.use_karras_sigmas:
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
|
||||
self.log_sigmas = torch.from_numpy(log_sigmas).to(device=device)
|
||||
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
|
||||
@@ -367,28 +359,6 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def step(
|
||||
self,
|
||||
model_output: Union[torch.Tensor, np.ndarray],
|
||||
|
||||
@@ -111,8 +111,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
prediction_type (`str`, defaults to `epsilon`, *optional*):
|
||||
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
|
||||
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
|
||||
@@ -136,13 +134,10 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
beta_schedule: str = "linear",
|
||||
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
prediction_type: str = "epsilon",
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -294,9 +289,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
if self.config.use_karras_sigmas:
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
|
||||
sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
|
||||
|
||||
@@ -370,28 +362,6 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def step(
|
||||
self,
|
||||
model_output: torch.Tensor,
|
||||
|
||||
@@ -122,8 +122,6 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
lambda_min_clipped (`float`, defaults to `-inf`):
|
||||
Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the
|
||||
cosine (`squaredcos_cap_v2`) noise schedule.
|
||||
@@ -158,14 +156,11 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
|
||||
algorithm_type: str = "data_prediction",
|
||||
lower_order_final: bool = True,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
lambda_min_clipped: float = -float("inf"),
|
||||
variance_type: Optional[str] = None,
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -289,9 +284,6 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
|
||||
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
else:
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
|
||||
@@ -403,28 +395,6 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def convert_model_output(
|
||||
self,
|
||||
model_output: torch.Tensor,
|
||||
|
||||
@@ -159,8 +159,6 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
|
||||
the sigmas are determined according to a sequence of noise levels {σi}.
|
||||
use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
|
||||
timestep_spacing (`str`, defaults to `"linspace"`):
|
||||
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
|
||||
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
||||
@@ -197,14 +195,11 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
disable_corrector: List[int] = [],
|
||||
solver_p: SchedulerMixin = None,
|
||||
use_karras_sigmas: Optional[bool] = False,
|
||||
use_exponential_sigmas: Optional[bool] = False,
|
||||
timestep_spacing: str = "linspace",
|
||||
steps_offset: int = 0,
|
||||
final_sigmas_type: Optional[str] = "zero", # "zero", "sigma_min"
|
||||
rescale_betas_zero_snr: bool = False,
|
||||
):
|
||||
if sum([self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
||||
raise ValueError("Only one of `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used.")
|
||||
if trained_betas is not None:
|
||||
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
|
||||
elif beta_schedule == "linear":
|
||||
@@ -334,9 +329,6 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}"
|
||||
)
|
||||
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
|
||||
elif self.config.use_exponential_sigmas:
|
||||
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps)
|
||||
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
|
||||
else:
|
||||
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
|
||||
if self.config.final_sigmas_type == "sigma_min":
|
||||
@@ -458,28 +450,6 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
||||
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
||||
return sigmas
|
||||
|
||||
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
||||
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
||||
"""Constructs an exponential noise schedule."""
|
||||
|
||||
# Hack to make sure that other schedulers which copy this function don't break
|
||||
# TODO: Add this logic to the other schedulers
|
||||
if hasattr(self.config, "sigma_min"):
|
||||
sigma_min = self.config.sigma_min
|
||||
else:
|
||||
sigma_min = None
|
||||
|
||||
if hasattr(self.config, "sigma_max"):
|
||||
sigma_max = self.config.sigma_max
|
||||
else:
|
||||
sigma_max = None
|
||||
|
||||
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
||||
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
||||
|
||||
sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
|
||||
return sigmas
|
||||
|
||||
def convert_model_output(
|
||||
self,
|
||||
model_output: torch.Tensor,
|
||||
|
||||
@@ -102,7 +102,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
def test_integration_move_lora_cpu(self):
|
||||
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
path = "Jiali/stable-diffusion-1.5"
|
||||
lora_id = "takuma104/lora-test-text-encoder-lora-target"
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
||||
@@ -161,7 +161,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
def test_integration_move_lora_dora_cpu(self):
|
||||
from peft import LoraConfig
|
||||
|
||||
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
path = "Jiali/stable-diffusion-1.5"
|
||||
unet_lora_config = LoraConfig(
|
||||
init_lora_weights="gaussian",
|
||||
target_modules=["to_k", "to_q", "to_v", "to_out.0"],
|
||||
@@ -221,7 +221,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def test_integration_logits_with_scale(self):
|
||||
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
path = "Jiali/stable-diffusion-1.5"
|
||||
lora_id = "takuma104/lora-test-text-encoder-lora-target"
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
|
||||
@@ -253,7 +253,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
release_memory(pipe)
|
||||
|
||||
def test_integration_logits_no_scale(self):
|
||||
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
path = "Jiali/stable-diffusion-1.5"
|
||||
lora_id = "takuma104/lora-test-text-encoder-lora-target"
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
|
||||
@@ -284,7 +284,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
|
||||
lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
|
||||
|
||||
base_model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
base_model_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
|
||||
pipe = pipe.to(torch_device)
|
||||
@@ -308,7 +308,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
|
||||
lora_model_id = "hf-internal-testing/lora-trained"
|
||||
|
||||
base_model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
base_model_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
|
||||
pipe = pipe.to(torch_device)
|
||||
@@ -419,9 +419,9 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
def test_kohya_sd_v15_with_higher_dimensions(self):
|
||||
generator = torch.Generator().manual_seed(0)
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
|
||||
).to(torch_device)
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
|
||||
torch_device
|
||||
)
|
||||
lora_model_id = "hf-internal-testing/urushisato-lora"
|
||||
lora_filename = "urushisato_v15.safetensors"
|
||||
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
|
||||
@@ -444,7 +444,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
|
||||
lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
|
||||
|
||||
base_model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
base_model_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
|
||||
pipe = pipe.to(torch_device)
|
||||
@@ -467,9 +467,9 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
prompt = "masterpiece, best quality, mountain"
|
||||
num_inference_steps = 2
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
|
||||
).to(torch_device)
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
|
||||
torch_device
|
||||
)
|
||||
initial_images = pipe(
|
||||
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
|
||||
).images
|
||||
@@ -505,9 +505,9 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
prompt = "masterpiece, best quality, mountain"
|
||||
num_inference_steps = 2
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
|
||||
).to(torch_device)
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
|
||||
torch_device
|
||||
)
|
||||
initial_images = pipe(
|
||||
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
|
||||
).images
|
||||
@@ -547,9 +547,9 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_not_empty_state_dict(self):
|
||||
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
|
||||
pipe = AutoPipelineForText2Image.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
|
||||
).to(torch_device)
|
||||
pipe = AutoPipelineForText2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
|
||||
torch_device
|
||||
)
|
||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||
|
||||
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
|
||||
@@ -561,9 +561,9 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_load_unload_load_state_dict(self):
|
||||
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
|
||||
pipe = AutoPipelineForText2Image.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
|
||||
).to(torch_device)
|
||||
pipe = AutoPipelineForText2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
|
||||
torch_device
|
||||
)
|
||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||
|
||||
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
|
||||
@@ -580,9 +580,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
release_memory(pipe)
|
||||
|
||||
def test_sdv1_5_lcm_lora(self):
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
|
||||
)
|
||||
pipe = DiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
|
||||
pipe.to(torch_device)
|
||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||
|
||||
@@ -610,9 +608,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
release_memory(pipe)
|
||||
|
||||
def test_sdv1_5_lcm_lora_img2img(self):
|
||||
pipe = AutoPipelineForImage2Image.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
|
||||
)
|
||||
pipe = AutoPipelineForImage2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
|
||||
pipe.to(torch_device)
|
||||
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
||||
|
||||
@@ -653,7 +649,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
This test simply checks that loading a LoRA with an empty network alpha works fine
|
||||
See: https://github.com/huggingface/diffusers/issues/5606
|
||||
"""
|
||||
pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
pipeline = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
pipeline.enable_sequential_cpu_offload()
|
||||
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
|
||||
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
|
||||
|
||||
@@ -1051,9 +1051,7 @@ class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_sd(self):
|
||||
vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder") # TODO - update
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", vae=vae, safety_checker=None
|
||||
)
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", vae=vae, safety_checker=None)
|
||||
pipe.to(torch_device)
|
||||
|
||||
out = pipe(
|
||||
@@ -1101,7 +1099,7 @@ class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase):
|
||||
"openai/consistency-decoder", torch_dtype=torch.float16
|
||||
) # TODO - update
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
"Jiali/stable-diffusion-1.5",
|
||||
torch_dtype=torch.float16,
|
||||
vae=vae,
|
||||
safety_checker=None,
|
||||
@@ -1126,7 +1124,7 @@ class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase):
|
||||
def test_vae_tiling(self):
|
||||
vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=torch.float16)
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", vae=vae, safety_checker=None, torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", vae=vae, safety_checker=None, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -73,7 +73,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.to("cuda")
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -715,7 +715,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -742,7 +742,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -769,7 +769,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-hed")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -796,7 +796,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-mlsd")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -823,7 +823,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-normal")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -850,7 +850,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -877,7 +877,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-scribble")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -904,7 +904,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-seg")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -935,7 +935,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-seg")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing()
|
||||
@@ -961,7 +961,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -993,7 +993,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.enable_model_cpu_offload()
|
||||
@@ -1035,7 +1035,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11e_sd15_shuffle")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -1081,9 +1081,7 @@ class StableDiffusionMultiControlNetPipelineSlowTests(unittest.TestCase):
|
||||
controlnet_pose = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose")
|
||||
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
safety_checker=None,
|
||||
controlnet=[controlnet_pose, controlnet_canny],
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=[controlnet_pose, controlnet_canny]
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -407,7 +407,7 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
|
||||
|
||||
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -504,7 +504,7 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
|
||||
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_inpaint")
|
||||
|
||||
pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
@@ -41,7 +41,7 @@ class FlaxControlNetPipelineIntegrationTests(unittest.TestCase):
|
||||
"lllyasviel/sd-controlnet-canny", from_pt=True, dtype=jnp.bfloat16
|
||||
)
|
||||
pipe, params = FlaxStableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
|
||||
"Jiali/stable-diffusion-1.5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
|
||||
)
|
||||
params["controlnet"] = controlnet_params
|
||||
|
||||
@@ -86,7 +86,7 @@ class FlaxControlNetPipelineIntegrationTests(unittest.TestCase):
|
||||
"lllyasviel/sd-controlnet-openpose", from_pt=True, dtype=jnp.bfloat16
|
||||
)
|
||||
pipe, params = FlaxStableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
|
||||
"Jiali/stable-diffusion-1.5", controlnet=controlnet, from_pt=True, dtype=jnp.bfloat16
|
||||
)
|
||||
params["controlnet"] = controlnet_params
|
||||
|
||||
|
||||
@@ -170,10 +170,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_text_to_image(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
image_encoder=image_encoder,
|
||||
safety_checker=None,
|
||||
torch_dtype=self.dtype,
|
||||
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
pipeline.to(torch_device)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
|
||||
@@ -203,10 +200,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_image_to_image(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
image_encoder=image_encoder,
|
||||
safety_checker=None,
|
||||
torch_dtype=self.dtype,
|
||||
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
pipeline.to(torch_device)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
|
||||
@@ -238,10 +232,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_inpainting(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
pipeline = StableDiffusionInpaintPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
image_encoder=image_encoder,
|
||||
safety_checker=None,
|
||||
torch_dtype=self.dtype,
|
||||
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
pipeline.to(torch_device)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
|
||||
@@ -269,10 +260,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_text_to_image_model_cpu_offload(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
image_encoder=image_encoder,
|
||||
safety_checker=None,
|
||||
torch_dtype=self.dtype,
|
||||
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
|
||||
pipeline.to(torch_device)
|
||||
@@ -299,10 +287,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_text_to_image_full_face(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
image_encoder=image_encoder,
|
||||
safety_checker=None,
|
||||
torch_dtype=self.dtype,
|
||||
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
pipeline.to(torch_device)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-full-face_sd15.bin")
|
||||
@@ -319,10 +304,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_unload(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
image_encoder=image_encoder,
|
||||
safety_checker=None,
|
||||
torch_dtype=self.dtype,
|
||||
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
before_processors = [attn_proc.__class__ for attn_proc in pipeline.unet.attn_processors.values()]
|
||||
pipeline.to(torch_device)
|
||||
@@ -341,10 +323,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_multi(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
image_encoder=image_encoder,
|
||||
safety_checker=None,
|
||||
torch_dtype=self.dtype,
|
||||
"Jiali/stable-diffusion-1.5", image_encoder=image_encoder, safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
pipeline.to(torch_device)
|
||||
pipeline.load_ip_adapter(
|
||||
@@ -364,7 +343,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
|
||||
def test_text_to_image_face_id(self):
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, torch_dtype=self.dtype
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, torch_dtype=self.dtype
|
||||
)
|
||||
pipeline.to(torch_device)
|
||||
pipeline.load_ip_adapter(
|
||||
|
||||
@@ -224,7 +224,7 @@ class LEditsPPPipelineStableDiffusionSlowTests(unittest.TestCase):
|
||||
|
||||
def test_ledits_pp_editing(self):
|
||||
pipe = LEditsPPPipelineStableDiffusion.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", safety_checker=None, torch_dtype=torch.float16
|
||||
)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -34,19 +34,19 @@ class LuminaText2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterM
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
transformer = LuminaNextDiT2DModel(
|
||||
sample_size=4,
|
||||
sample_size=16,
|
||||
patch_size=2,
|
||||
in_channels=4,
|
||||
hidden_size=4,
|
||||
hidden_size=24,
|
||||
num_layers=2,
|
||||
num_attention_heads=1,
|
||||
num_attention_heads=3,
|
||||
num_kv_heads=1,
|
||||
multiple_of=16,
|
||||
ffn_dim_multiplier=None,
|
||||
norm_eps=1e-5,
|
||||
learn_sigma=True,
|
||||
qk_norm=True,
|
||||
cross_attention_dim=8,
|
||||
cross_attention_dim=32,
|
||||
scaling_factor=1.0,
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
@@ -57,8 +57,8 @@ class LuminaText2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterM
|
||||
|
||||
torch.manual_seed(0)
|
||||
config = GemmaConfig(
|
||||
head_dim=2,
|
||||
hidden_size=8,
|
||||
head_dim=4,
|
||||
hidden_size=32,
|
||||
intermediate_size=37,
|
||||
num_attention_heads=4,
|
||||
num_hidden_layers=2,
|
||||
|
||||
@@ -283,7 +283,7 @@ class StableDiffusionPAGPipelineFastTests(
|
||||
@require_torch_gpu
|
||||
class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
pipeline_class = StableDiffusionPAGPipeline
|
||||
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
repo_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
@@ -287,7 +287,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_positive_guidance(self):
|
||||
torch_device = "cuda"
|
||||
pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -370,7 +370,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_negative_guidance(self):
|
||||
torch_device = "cuda"
|
||||
pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -453,7 +453,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_multi_cond_guidance(self):
|
||||
torch_device = "cuda"
|
||||
pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -536,9 +536,7 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_guidance_fp16(self):
|
||||
torch_device = "cuda"
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
|
||||
)
|
||||
pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
|
||||
@@ -250,10 +250,10 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_inference_ddim(self):
|
||||
ddim_scheduler = DDIMScheduler.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx"
|
||||
"Jiali/stable-diffusion-1.5", subfolder="scheduler", revision="onnx"
|
||||
)
|
||||
sd_pipe = OnnxStableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
"Jiali/stable-diffusion-1.5",
|
||||
revision="onnx",
|
||||
scheduler=ddim_scheduler,
|
||||
safety_checker=None,
|
||||
@@ -276,10 +276,10 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_inference_k_lms(self):
|
||||
lms_scheduler = LMSDiscreteScheduler.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx"
|
||||
"Jiali/stable-diffusion-1.5", subfolder="scheduler", revision="onnx"
|
||||
)
|
||||
sd_pipe = OnnxStableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
"Jiali/stable-diffusion-1.5",
|
||||
revision="onnx",
|
||||
scheduler=lms_scheduler,
|
||||
safety_checker=None,
|
||||
@@ -327,7 +327,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
test_callback_fn.has_been_called = False
|
||||
|
||||
pipe = OnnxStableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
"Jiali/stable-diffusion-1.5",
|
||||
revision="onnx",
|
||||
safety_checker=None,
|
||||
feature_extractor=None,
|
||||
@@ -352,7 +352,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_stable_diffusion_no_safety_checker(self):
|
||||
pipe = OnnxStableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
"Jiali/stable-diffusion-1.5",
|
||||
revision="onnx",
|
||||
safety_checker=None,
|
||||
feature_extractor=None,
|
||||
|
||||
@@ -210,10 +210,10 @@ class OnnxStableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
init_image = init_image.resize((768, 512))
|
||||
lms_scheduler = LMSDiscreteScheduler.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx"
|
||||
"Jiali/stable-diffusion-1.5", subfolder="scheduler", revision="onnx"
|
||||
)
|
||||
pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
"Jiali/stable-diffusion-1.5",
|
||||
revision="onnx",
|
||||
scheduler=lms_scheduler,
|
||||
safety_checker=None,
|
||||
|
||||
@@ -1332,7 +1332,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
|
||||
def test_download_from_hub(self):
|
||||
ckpt_paths = [
|
||||
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
|
||||
"https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors",
|
||||
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors",
|
||||
]
|
||||
|
||||
@@ -1346,10 +1346,8 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
assert image_out.shape == (512, 512, 3)
|
||||
|
||||
def test_download_local(self):
|
||||
ckpt_filename = hf_hub_download(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.safetensors"
|
||||
)
|
||||
config_filename = hf_hub_download("stable-diffusion-v1-5/stable-diffusion-v1-5", filename="v1-inference.yaml")
|
||||
ckpt_filename = hf_hub_download("Jiali/stable-diffusion-1.5", filename="v1-5-pruned-emaonly.safetensors")
|
||||
config_filename = hf_hub_download("Jiali/stable-diffusion-1.5", filename="v1-inference.yaml")
|
||||
|
||||
pipe = StableDiffusionPipeline.from_single_file(
|
||||
ckpt_filename, config_files={"v1": config_filename}, torch_dtype=torch.float16
|
||||
@@ -1404,9 +1402,7 @@ class StableDiffusionPipelineNightlyTests(unittest.TestCase):
|
||||
assert max_diff < 1e-3
|
||||
|
||||
def test_stable_diffusion_1_5_pndm(self):
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5").to(
|
||||
torch_device
|
||||
)
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5").to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
@@ -1487,9 +1483,9 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
return inputs
|
||||
|
||||
def get_pipeline_output_without_device_map(self):
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
|
||||
).to(torch_device)
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
|
||||
torch_device
|
||||
)
|
||||
sd_pipe.set_progress_bar_config(disable=True)
|
||||
inputs = self.get_inputs()
|
||||
no_device_map_image = sd_pipe(**inputs).images
|
||||
@@ -1502,7 +1498,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
no_device_map_image = self.get_pipeline_output_without_device_map()
|
||||
|
||||
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
|
||||
)
|
||||
sd_pipe_with_device_map.set_progress_bar_config(disable=True)
|
||||
inputs = self.get_inputs()
|
||||
@@ -1513,7 +1509,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
|
||||
def test_components_put_in_right_devices(self):
|
||||
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
|
||||
)
|
||||
|
||||
assert len(set(sd_pipe_with_device_map.hf_device_map.values())) >= 2
|
||||
@@ -1522,7 +1518,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
no_device_map_image = self.get_pipeline_output_without_device_map()
|
||||
|
||||
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||
"Jiali/stable-diffusion-1.5",
|
||||
device_map="balanced",
|
||||
max_memory={0: "1GB", 1: "1GB"},
|
||||
torch_dtype=torch.float16,
|
||||
@@ -1536,7 +1532,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
|
||||
def test_reset_device_map(self):
|
||||
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
|
||||
)
|
||||
sd_pipe_with_device_map.reset_device_map()
|
||||
|
||||
@@ -1548,7 +1544,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
|
||||
def test_reset_device_map_to(self):
|
||||
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
|
||||
)
|
||||
sd_pipe_with_device_map.reset_device_map()
|
||||
|
||||
@@ -1560,7 +1556,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
|
||||
def test_reset_device_map_enable_model_cpu_offload(self):
|
||||
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
|
||||
)
|
||||
sd_pipe_with_device_map.reset_device_map()
|
||||
|
||||
@@ -1572,7 +1568,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
|
||||
def test_reset_device_map_enable_sequential_cpu_offload(self):
|
||||
sd_pipe_with_device_map = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", device_map="balanced", torch_dtype=torch.float16
|
||||
"Jiali/stable-diffusion-1.5", device_map="balanced", torch_dtype=torch.float16
|
||||
)
|
||||
sd_pipe_with_device_map.reset_device_map()
|
||||
|
||||
|
||||
@@ -566,7 +566,7 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
assert module.device == torch.device("cpu")
|
||||
|
||||
def test_img2img_2nd_order(self):
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
sd_pipe.scheduler = HeunDiscreteScheduler.from_config(sd_pipe.scheduler.config)
|
||||
sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
@@ -630,7 +630,7 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3
|
||||
|
||||
def test_img2img_safety_checker_works(self):
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -686,7 +686,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
|
||||
return inputs
|
||||
|
||||
def test_img2img_pndm(self):
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -701,7 +701,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
|
||||
assert max_diff < 1e-3
|
||||
|
||||
def test_img2img_ddim(self):
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
sd_pipe.scheduler = DDIMScheduler.from_config(sd_pipe.scheduler.config)
|
||||
sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
@@ -717,7 +717,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
|
||||
assert max_diff < 1e-3
|
||||
|
||||
def test_img2img_lms(self):
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
|
||||
sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
@@ -733,7 +733,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
|
||||
assert max_diff < 1e-3
|
||||
|
||||
def test_img2img_dpm(self):
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
sd_pipe.scheduler = DPMSolverMultistepScheduler.from_config(sd_pipe.scheduler.config)
|
||||
sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
@@ -767,9 +767,7 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
assert np.abs(expected_slice - image_slice).max() < 1e-3
|
||||
|
||||
def test_stable_diffusion_simple_inpaint_ddim(self):
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
|
||||
)
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -975,9 +973,7 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
|
||||
|
||||
def test_stable_diffusion_simple_inpaint_ddim(self):
|
||||
vae = AsymmetricAutoencoderKL.from_pretrained("cross-attention/asymmetric-autoencoder-kl-x-1-5")
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
|
||||
)
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
|
||||
pipe.vae = vae
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe.to(torch_device)
|
||||
|
||||
@@ -609,7 +609,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_stable_diffusion_adapter_depth_sd_v15(self):
|
||||
adapter_model = "TencentARC/t2iadapter_depth_sd15v2"
|
||||
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
sd_model = "Jiali/stable-diffusion-1.5"
|
||||
prompt = "desk"
|
||||
image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/desk_depth.png"
|
||||
input_channels = 3
|
||||
@@ -636,7 +636,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_stable_diffusion_adapter_zoedepth_sd_v15(self):
|
||||
adapter_model = "TencentARC/t2iadapter_zoedepth_sd15v1"
|
||||
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
sd_model = "Jiali/stable-diffusion-1.5"
|
||||
prompt = "motorcycle"
|
||||
image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/motorcycle.png"
|
||||
input_channels = 3
|
||||
@@ -660,7 +660,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_stable_diffusion_adapter_canny_sd_v15(self):
|
||||
adapter_model = "TencentARC/t2iadapter_canny_sd15v2"
|
||||
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
sd_model = "Jiali/stable-diffusion-1.5"
|
||||
prompt = "toy"
|
||||
image_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png"
|
||||
input_channels = 1
|
||||
@@ -688,7 +688,7 @@ class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_stable_diffusion_adapter_sketch_sd15(self):
|
||||
adapter_model = "TencentARC/t2iadapter_sketch_sd15v2"
|
||||
sd_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
sd_model = "Jiali/stable-diffusion-1.5"
|
||||
prompt = "cat"
|
||||
image_url = (
|
||||
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/edge.png"
|
||||
|
||||
@@ -277,9 +277,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def test_harm_safe_stable_diffusion(self):
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
|
||||
)
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
|
||||
sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
|
||||
sd_pipe = sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
@@ -340,9 +338,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||
|
||||
def test_nudity_safe_stable_diffusion(self):
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
|
||||
)
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None)
|
||||
sd_pipe.scheduler = LMSDiscreteScheduler.from_config(sd_pipe.scheduler.config)
|
||||
sd_pipe = sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
@@ -396,7 +392,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||
|
||||
def test_nudity_safetychecker_safe_stable_diffusion(self):
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
|
||||
sd_pipe = sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ from diffusers.utils.testing_utils import slow
|
||||
|
||||
PRETRAINED_MODEL_REPO_MAPPING = OrderedDict(
|
||||
[
|
||||
("stable-diffusion", "stable-diffusion-v1-5/stable-diffusion-v1-5"),
|
||||
("stable-diffusion", "Jiali/stable-diffusion-1.5"),
|
||||
("if", "DeepFloyd/IF-I-XL-v1.0"),
|
||||
("kandinsky", "kandinsky-community/kandinsky-2-1"),
|
||||
("kandinsky22", "kandinsky-community/kandinsky-2-2-decoder"),
|
||||
@@ -539,7 +539,7 @@ class AutoPipelineIntegrationTest(unittest.TestCase):
|
||||
|
||||
def test_controlnet(self):
|
||||
# test from_pretrained
|
||||
model_repo = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
model_repo = "Jiali/stable-diffusion-1.5"
|
||||
controlnet_repo = "lllyasviel/sd-controlnet-canny"
|
||||
|
||||
controlnet = ControlNetModel.from_pretrained(controlnet_repo, torch_dtype=torch.float16)
|
||||
|
||||
@@ -40,7 +40,7 @@ class TextToVideoZeroPipelineSlowTests(unittest.TestCase):
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def test_full_model(self):
|
||||
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
model_id = "Jiali/stable-diffusion-1.5"
|
||||
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
generator = torch.Generator(device="cuda").manual_seed(0)
|
||||
|
||||
@@ -30,13 +30,11 @@ enable_full_determinism()
|
||||
@require_torch_gpu
|
||||
class StableDiffusionControlNetPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
|
||||
pipeline_class = StableDiffusionControlNetPipeline
|
||||
ckpt_path = (
|
||||
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
)
|
||||
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
original_config = (
|
||||
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
|
||||
)
|
||||
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
repo_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
@@ -31,7 +31,7 @@ class StableDiffusionControlNetInpaintPipelineSingleFileSlowTests(unittest.TestC
|
||||
pipeline_class = StableDiffusionControlNetInpaintPipeline
|
||||
ckpt_path = "https://huggingface.co/botp/stable-diffusion-v1-5-inpainting/blob/main/sd-v1-5-inpainting.ckpt"
|
||||
original_config = "https://raw.githubusercontent.com/runwayml/stable-diffusion/main/configs/stable-diffusion/v1-inpainting-inference.yaml"
|
||||
repo_id = "stable-diffusion-v1-5/stable-diffusion-inpainting"
|
||||
repo_id = "botp/stable-diffusion-v1-5-inpainting"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
@@ -29,13 +29,11 @@ enable_full_determinism()
|
||||
@require_torch_gpu
|
||||
class StableDiffusionControlNetPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
|
||||
pipeline_class = StableDiffusionControlNetPipeline
|
||||
ckpt_path = (
|
||||
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
)
|
||||
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
original_config = (
|
||||
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
|
||||
)
|
||||
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
repo_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
@@ -23,13 +23,11 @@ enable_full_determinism()
|
||||
@require_torch_gpu
|
||||
class StableDiffusionImg2ImgPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
|
||||
pipeline_class = StableDiffusionImg2ImgPipeline
|
||||
ckpt_path = (
|
||||
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
)
|
||||
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
original_config = (
|
||||
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
|
||||
)
|
||||
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
repo_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
@@ -63,7 +63,7 @@ class StableDiffusionInpaintPipelineSingleFileSlowTests(unittest.TestCase, SDSin
|
||||
|
||||
def test_single_file_loading_4_channel_unet(self):
|
||||
# Test loading single file inpaint with a 4 channel UNet
|
||||
ckpt_path = "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
pipe = self.pipeline_class.from_single_file(ckpt_path)
|
||||
|
||||
assert pipe.unet.config.in_channels == 4
|
||||
|
||||
@@ -26,13 +26,11 @@ enable_full_determinism()
|
||||
@require_torch_gpu
|
||||
class StableDiffusionPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin):
|
||||
pipeline_class = StableDiffusionPipeline
|
||||
ckpt_path = (
|
||||
"https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
)
|
||||
ckpt_path = "https://huggingface.co/Jiali/stable-diffusion-1.5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
original_config = (
|
||||
"https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
|
||||
)
|
||||
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
repo_id = "Jiali/stable-diffusion-1.5"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
Reference in New Issue
Block a user