Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 74ce30c11f |
@@ -1,7 +1,6 @@
|
||||
name: Benchmarking tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "30 1 1,15 * *" # every 2 weeks on the 1st and the 15th of every month at 1:30 AM
|
||||
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
name: Check for Broken Links
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 0 * * *"
|
||||
|
||||
jobs:
|
||||
check_for_broken_links:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Link Checker
|
||||
id: lychee
|
||||
uses: lycheeverse/lychee-action@v1
|
||||
with:
|
||||
args: './**/*.md'
|
||||
fail: true
|
||||
|
||||
- name: Create Issue From File
|
||||
if: env.lychee_exit_code != 0
|
||||
uses: diffusers/create-issue-from-file@v4
|
||||
with:
|
||||
title: Link Checker Report
|
||||
content-filepath: ./lychee/out.md
|
||||
labels: report, automated issue
|
||||
@@ -1,80 +0,0 @@
|
||||
name: Doctests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- doctest*
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "0 0 * * *"
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
RUN_SLOW: yes
|
||||
OMP_NUM_THREADS: 16
|
||||
MKL_NUM_THREADS: 16
|
||||
|
||||
jobs:
|
||||
run_doctests:
|
||||
runs-on: [single-gpu, nvidia-gpu, a10, ci]
|
||||
container:
|
||||
image: huggingface/diffusers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
uses: actions/checkout@v3
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Install dependencies
|
||||
run: python3 -m pip install -e .[quality,test,training]
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Get doctest files
|
||||
run: |
|
||||
$(python3 -c 'from utils.tests_fetcher import get_all_doctest_files; to_test = get_all_doctest_files(); to_test = " ".join(to_test); fp = open("doc_tests.txt", "w"); fp.write(to_test); fp.close()')
|
||||
|
||||
- name: Run doctests
|
||||
env:
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat reports/doc_tests_gpu/failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: doc_tests_gpu_test_reports
|
||||
path: reports/doc_tests_gpu
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [run_doctests]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
run: |
|
||||
pip install slack_sdk
|
||||
python utils/notification_service_doc_tests.py
|
||||
@@ -36,7 +36,6 @@ repo-consistency:
|
||||
python utils/check_dummies.py
|
||||
python utils/check_repo.py
|
||||
python utils/check_inits.py
|
||||
python utils/check_doctest_list.py
|
||||
|
||||
# this target runs checks on all files
|
||||
|
||||
@@ -68,7 +67,6 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
|
||||
fix-copies:
|
||||
python utils/check_copies.py --fix_and_overwrite
|
||||
python utils/check_dummies.py --fix_and_overwrite
|
||||
python utils/check_doctest_list.py --fix_and_overwrite
|
||||
|
||||
# Run tests for the library
|
||||
|
||||
|
||||
@@ -141,7 +141,6 @@ class LCMLoRATextToImageBenchmark(TextToImageBenchmark):
|
||||
super().__init__(args)
|
||||
self.pipe.load_lora_weights(self.lora_id)
|
||||
self.pipe.fuse_lora()
|
||||
self.pipe.unload_lora_weights()
|
||||
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
|
||||
|
||||
def get_result_filepath(self, args):
|
||||
@@ -236,35 +235,6 @@ class InpaintingBenchmark(ImageToImageBenchmark):
|
||||
)
|
||||
|
||||
|
||||
class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
|
||||
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png"
|
||||
image = load_image(url)
|
||||
|
||||
def __init__(self, args):
|
||||
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16).to("cuda")
|
||||
pipe.load_ip_adapter(
|
||||
args.ip_adapter_id[0],
|
||||
subfolder="models" if "sdxl" not in args.ip_adapter_id[1] else "sdxl_models",
|
||||
weight_name=args.ip_adapter_id[1],
|
||||
)
|
||||
|
||||
if args.run_compile:
|
||||
pipe.unet.to(memory_format=torch.channels_last)
|
||||
print("Run torch compile")
|
||||
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
||||
|
||||
pipe.set_progress_bar_config(disable=True)
|
||||
self.pipe = pipe
|
||||
|
||||
def run_inference(self, pipe, args):
|
||||
_ = pipe(
|
||||
prompt=PROMPT,
|
||||
ip_adapter_image=self.image,
|
||||
num_inference_steps=args.num_inference_steps,
|
||||
num_images_per_prompt=args.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class ControlNetBenchmark(TextToImageBenchmark):
|
||||
pipeline_class = StableDiffusionControlNetPipeline
|
||||
aux_network_class = ControlNetModel
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from base_classes import IPAdapterTextToImageBenchmark # noqa: E402
|
||||
|
||||
|
||||
IP_ADAPTER_CKPTS = {
|
||||
"runwayml/stable-diffusion-v1-5": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
|
||||
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--ckpt",
|
||||
type=str,
|
||||
default="runwayml/stable-diffusion-v1-5",
|
||||
choices=list(IP_ADAPTER_CKPTS.keys()),
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=1)
|
||||
parser.add_argument("--num_inference_steps", type=int, default=50)
|
||||
parser.add_argument("--model_cpu_offload", action="store_true")
|
||||
parser.add_argument("--run_compile", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
args.ip_adapter_id = IP_ADAPTER_CKPTS[args.ckpt]
|
||||
benchmark_pipe = IPAdapterTextToImageBenchmark(args)
|
||||
args.ckpt = f"{args.ckpt} (IP-Adapter)"
|
||||
benchmark_pipe.benchmark(args)
|
||||
@@ -72,7 +72,7 @@ def main():
|
||||
command += " --run_compile"
|
||||
run_command(command.split())
|
||||
|
||||
elif file in ["benchmark_sd_inpainting.py", "benchmark_ip_adapters.py"]:
|
||||
elif file == "benchmark_sd_inpainting.py":
|
||||
sdxl_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
command = f"python {file} --ckpt {sdxl_ckpt}"
|
||||
run_command(command.split())
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
||||
FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
LABEL repository="diffusers"
|
||||
|
||||
@@ -24,9 +24,9 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
|
||||
python3 -m uv pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
torch==2.1.2 \
|
||||
torchvision==0.16.2 \
|
||||
torchaudio==2.1.2 \
|
||||
"onnxruntime-gpu>=1.13.1" \
|
||||
--extra-index-url https://download.pytorch.org/whl/cu117 && \
|
||||
python3 -m uv pip install --no-cache-dir \
|
||||
|
||||
@@ -169,7 +169,7 @@ list_adapters_component_wise
|
||||
|
||||
If you want to compile your model with `torch.compile` make sure to first fuse the LoRA weights into the base model and unload them.
|
||||
|
||||
```diff
|
||||
```py
|
||||
pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
|
||||
pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
|
||||
|
||||
@@ -178,16 +178,12 @@ pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0])
|
||||
pipe.fuse_lora()
|
||||
pipe.unload_lora_weights()
|
||||
|
||||
+ pipe.unet.to(memory_format=torch.channels_last)
|
||||
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
||||
pipe = torch.compile(pipe)
|
||||
|
||||
prompt = "toy_face of a hacker with a hoodie, pixel art"
|
||||
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> You can refer to the `torch.compile()` section [here](https://huggingface.co/docs/diffusers/main/en/optimization/torch2.0#torchcompile) and [here](https://huggingface.co/docs/diffusers/main/en/tutorials/fast_diffusion#torchcompile) for more elaborate examples.
|
||||
|
||||
## Fusing adapters into the model
|
||||
|
||||
You can use PEFT to easily fuse/unfuse multiple adapters directly into the model weights (both UNet and text encoder) using the [`~diffusers.loaders.LoraLoaderMixin.fuse_lora`] method, which can lead to a speed-up in inference and lower VRAM usage.
|
||||
|
||||
@@ -80,7 +80,8 @@ To do so, just specify `--train_text_encoder_ti` while launching training (for r
|
||||
Please keep the following points in mind:
|
||||
|
||||
* SDXL has two text encoders. So, we fine-tune both using LoRA.
|
||||
* When not fine-tuning the text encoders, we ALWAYS precompute the text embeddings to save memory.
|
||||
* When not fine-tuning the text encoders, we ALWAYS precompute the text embeddings to save memoםהקרry.
|
||||
|
||||
|
||||
### 3D icon example
|
||||
|
||||
@@ -233,32 +234,6 @@ In ComfyUI we will load a LoRA and a textual embedding at the same time.
|
||||
|
||||
SDXL's VAE is known to suffer from numerical instability issues. This is why we also expose a CLI argument namely `--pretrained_vae_model_name_or_path` that lets you specify the location of a better VAE (such as [this one](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix)).
|
||||
|
||||
### DoRA training
|
||||
The advanced script now supports DoRA training too!
|
||||
> Proposed in [DoRA: Weight-Decomposed Low-Rank Adaptation](https://arxiv.org/abs/2402.09353),
|
||||
**DoRA** is very similar to LoRA, except it decomposes the pre-trained weight into two components, **magnitude** and **direction** and employs LoRA for _directional_ updates to efficiently minimize the number of trainable parameters.
|
||||
The authors found that by using DoRA, both the learning capacity and training stability of LoRA are enhanced without any additional overhead during inference.
|
||||
|
||||
> [!NOTE]
|
||||
> 💡DoRA training is still _experimental_
|
||||
> and is likely to require different hyperparameter values to perform best compared to a LoRA.
|
||||
> Specifically, we've noticed 2 differences to take into account your training:
|
||||
> 1. **LoRA seem to converge faster than DoRA** (so a set of parameters that may lead to overfitting when training a LoRA may be working well for a DoRA)
|
||||
> 2. **DoRA quality superior to LoRA especially in lower ranks** the difference in quality of DoRA of rank 8 and LoRA of rank 8 appears to be more significant than when training ranks of 32 or 64 for example.
|
||||
> This is also aligned with some of the quantitative analysis shown in the paper.
|
||||
|
||||
**Usage**
|
||||
1. To use DoRA you need to install `peft` from main:
|
||||
```bash
|
||||
pip install git+https://github.com/huggingface/peft.git
|
||||
```
|
||||
2. Enable DoRA training by adding this flag
|
||||
```bash
|
||||
--use_dora
|
||||
```
|
||||
**Inference**
|
||||
The inference is the same as if you train a regular LoRA 🤗
|
||||
|
||||
|
||||
### Tips and Tricks
|
||||
Check out [these recommended practices](https://huggingface.co/blog/sdxl_lora_advanced_script#additional-good-practices)
|
||||
|
||||
@@ -651,16 +651,6 @@ def parse_args(input_args=None):
|
||||
default=4,
|
||||
help=("The dimension of the LoRA update matrices."),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use_dora",
|
||||
type=bool,
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. "
|
||||
"Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cache_latents",
|
||||
action="store_true",
|
||||
@@ -1229,7 +1219,6 @@ def main(args):
|
||||
unet_lora_config = LoraConfig(
|
||||
r=args.rank,
|
||||
lora_alpha=args.rank,
|
||||
use_dora=args.use_dora,
|
||||
init_lora_weights="gaussian",
|
||||
target_modules=["to_k", "to_q", "to_v", "to_out.0"],
|
||||
)
|
||||
@@ -1241,7 +1230,6 @@ def main(args):
|
||||
text_lora_config = LoraConfig(
|
||||
r=args.rank,
|
||||
lora_alpha=args.rank,
|
||||
use_dora=args.use_dora,
|
||||
init_lora_weights="gaussian",
|
||||
target_modules=["q_proj", "k_proj", "v_proj", "out_proj"],
|
||||
)
|
||||
|
||||
@@ -661,16 +661,6 @@ def parse_args(input_args=None):
|
||||
default=4,
|
||||
help=("The dimension of the LoRA update matrices."),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use_dora",
|
||||
type=bool,
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. "
|
||||
"Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cache_latents",
|
||||
action="store_true",
|
||||
@@ -1333,7 +1323,6 @@ def main(args):
|
||||
unet_lora_config = LoraConfig(
|
||||
r=args.rank,
|
||||
lora_alpha=args.rank,
|
||||
use_dora=args.use_dora,
|
||||
init_lora_weights="gaussian",
|
||||
target_modules=["to_k", "to_q", "to_v", "to_out.0"],
|
||||
)
|
||||
@@ -1345,7 +1334,6 @@ def main(args):
|
||||
text_lora_config = LoraConfig(
|
||||
r=args.rank,
|
||||
lora_alpha=args.rank,
|
||||
use_dora=args.use_dora,
|
||||
init_lora_weights="gaussian",
|
||||
target_modules=["q_proj", "k_proj", "v_proj", "out_proj"],
|
||||
)
|
||||
|
||||
@@ -206,40 +206,3 @@ You can explore the results from a couple of our internal experiments by checkin
|
||||
## Running on a free-tier Colab Notebook
|
||||
|
||||
Check out [this notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/SDXL_DreamBooth_LoRA_.ipynb).
|
||||
|
||||
## Conducting EDM-style training
|
||||
|
||||
It's now possible to perform EDM-style training as proposed in [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364).
|
||||
|
||||
For the SDXL model, simple set:
|
||||
|
||||
```diff
|
||||
+ --do_edm_style_training \
|
||||
```
|
||||
|
||||
Other SDXL-like models that use the EDM formulation, such as [playgroundai/playground-v2.5-1024px-aesthetic](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic), can also be DreamBooth'd with the script. Below is an example command:
|
||||
|
||||
```bash
|
||||
accelerate launch train_dreambooth_lora_sdxl.py \
|
||||
--pretrained_model_name_or_path="playgroundai/playground-v2.5-1024px-aesthetic" \
|
||||
--instance_data_dir="dog" \
|
||||
--output_dir="dog-playground-lora" \
|
||||
--mixed_precision="fp16" \
|
||||
--instance_prompt="a photo of sks dog" \
|
||||
--resolution=1024 \
|
||||
--train_batch_size=1 \
|
||||
--gradient_accumulation_steps=4 \
|
||||
--learning_rate=1e-4 \
|
||||
--use_8bit_adam \
|
||||
--report_to="wandb" \
|
||||
--lr_scheduler="constant" \
|
||||
--lr_warmup_steps=0 \
|
||||
--max_train_steps=500 \
|
||||
--validation_prompt="A photo of sks dog in a bucket" \
|
||||
--validation_epochs=25 \
|
||||
--seed="0" \
|
||||
--push_to_hub
|
||||
```
|
||||
|
||||
> [!CAUTION]
|
||||
> Min-SNR gamma is not supported with the EDM-style training yet. When training with the PlaygroundAI model, it's recommended to not pass any "variant".
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2024 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import safetensors
|
||||
|
||||
|
||||
sys.path.append("..")
|
||||
from test_examples_utils import ExamplesTestsAccelerate, run_command # noqa: E402
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
logger = logging.getLogger()
|
||||
stream_handler = logging.StreamHandler(sys.stdout)
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
|
||||
class DreamBoothLoRASDXLWithEDM(ExamplesTestsAccelerate):
|
||||
def test_dreambooth_lora_sdxl_with_edm(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
test_args = f"""
|
||||
examples/dreambooth/train_dreambooth_lora_sdxl.py
|
||||
--pretrained_model_name_or_path hf-internal-testing/tiny-stable-diffusion-xl-pipe
|
||||
--do_edm_style_training
|
||||
--instance_data_dir docs/source/en/imgs
|
||||
--instance_prompt photo
|
||||
--resolution 64
|
||||
--train_batch_size 1
|
||||
--gradient_accumulation_steps 1
|
||||
--max_train_steps 2
|
||||
--learning_rate 5.0e-04
|
||||
--scale_lr
|
||||
--lr_scheduler constant
|
||||
--lr_warmup_steps 0
|
||||
--output_dir {tmpdir}
|
||||
""".split()
|
||||
|
||||
run_command(self._launch_args + test_args)
|
||||
# save_pretrained smoke test
|
||||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))
|
||||
|
||||
# make sure the state_dict has the correct naming in the parameters.
|
||||
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
|
||||
is_lora = all("lora" in k for k in lora_state_dict.keys())
|
||||
self.assertTrue(is_lora)
|
||||
|
||||
# when not training the text encoder, all the parameters in the state dict should start
|
||||
# with `"unet"` in their names.
|
||||
starts_with_unet = all(key.startswith("unet") for key in lora_state_dict.keys())
|
||||
self.assertTrue(starts_with_unet)
|
||||
|
||||
def test_dreambooth_lora_playground(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
test_args = f"""
|
||||
examples/dreambooth/train_dreambooth_lora_sdxl.py
|
||||
--pretrained_model_name_or_path hf-internal-testing/tiny-playground-v2-5-pipe
|
||||
--instance_data_dir docs/source/en/imgs
|
||||
--instance_prompt photo
|
||||
--resolution 64
|
||||
--train_batch_size 1
|
||||
--gradient_accumulation_steps 1
|
||||
--max_train_steps 2
|
||||
--learning_rate 5.0e-04
|
||||
--scale_lr
|
||||
--lr_scheduler constant
|
||||
--lr_warmup_steps 0
|
||||
--output_dir {tmpdir}
|
||||
""".split()
|
||||
|
||||
run_command(self._launch_args + test_args)
|
||||
# save_pretrained smoke test
|
||||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))
|
||||
|
||||
# make sure the state_dict has the correct naming in the parameters.
|
||||
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
|
||||
is_lora = all("lora" in k for k in lora_state_dict.keys())
|
||||
self.assertTrue(is_lora)
|
||||
|
||||
# when not training the text encoder, all the parameters in the state dict should start
|
||||
# with `"unet"` in their names.
|
||||
starts_with_unet = all(key.startswith("unet") for key in lora_state_dict.keys())
|
||||
self.assertTrue(starts_with_unet)
|
||||
@@ -14,10 +14,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
|
||||
import argparse
|
||||
import contextlib
|
||||
import gc
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
@@ -34,7 +32,7 @@ import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed
|
||||
from huggingface_hub import create_repo, hf_hub_download, upload_folder
|
||||
from huggingface_hub import create_repo, upload_folder
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
from packaging import version
|
||||
from peft import LoraConfig, set_peft_model_state_dict
|
||||
@@ -52,8 +50,6 @@ from diffusers import (
|
||||
AutoencoderKL,
|
||||
DDPMScheduler,
|
||||
DPMSolverMultistepScheduler,
|
||||
EDMEulerScheduler,
|
||||
EulerDiscreteScheduler,
|
||||
StableDiffusionXLPipeline,
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
@@ -80,20 +76,6 @@ check_min_version("0.27.0.dev0")
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def determine_scheduler_type(pretrained_model_name_or_path, revision):
|
||||
model_index_filename = "model_index.json"
|
||||
if os.path.isdir(pretrained_model_name_or_path):
|
||||
model_index = os.path.join(pretrained_model_name_or_path, model_index_filename)
|
||||
else:
|
||||
model_index = hf_hub_download(
|
||||
repo_id=pretrained_model_name_or_path, filename=model_index_filename, revision=revision
|
||||
)
|
||||
|
||||
with open(model_index, "r") as f:
|
||||
scheduler_type = json.load(f)["scheduler"][1]
|
||||
return scheduler_type
|
||||
|
||||
|
||||
def save_model_card(
|
||||
repo_id: str,
|
||||
images=None,
|
||||
@@ -113,7 +95,7 @@ def save_model_card(
|
||||
)
|
||||
|
||||
model_description = f"""
|
||||
# {'SDXL' if 'playgroundai' not in base_model else 'Playground'} LoRA DreamBooth - {repo_id}
|
||||
# SDXL LoRA DreamBooth - {repo_id}
|
||||
|
||||
<Gallery />
|
||||
|
||||
@@ -137,17 +119,11 @@ Weights for this model are available in Safetensors format.
|
||||
|
||||
[Download]({repo_id}/tree/main) them in the Files & versions tab.
|
||||
|
||||
"""
|
||||
if "playgroundai" in args.pretrained_model_name_or_path:
|
||||
model_description += """\n
|
||||
## License
|
||||
|
||||
Please adhere to the licensing terms as described [here](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic/blob/main/LICENSE.md).
|
||||
"""
|
||||
model_card = load_or_create_model_card(
|
||||
repo_id_or_path=repo_id,
|
||||
from_training=True,
|
||||
license="openrail++" if "playgroundai" not in base_model else "playground-v2dot5-community",
|
||||
license="openrail++",
|
||||
base_model=base_model,
|
||||
prompt=instance_prompt,
|
||||
model_description=model_description,
|
||||
@@ -155,17 +131,15 @@ Please adhere to the licensing terms as described [here](https://huggingface.co/
|
||||
)
|
||||
tags = [
|
||||
"text-to-image",
|
||||
"stable-diffusion-xl",
|
||||
"stable-diffusion-xl-diffusers",
|
||||
"text-to-image",
|
||||
"diffusers",
|
||||
"lora",
|
||||
"template:sd-lora",
|
||||
]
|
||||
if "playgroundai" in base_model:
|
||||
tags.extend(["playground", "playground-diffusers"])
|
||||
else:
|
||||
tags.extend(["stable-diffusion-xl", "stable-diffusion-xl-diffusers"])
|
||||
|
||||
model_card = populate_model_card(model_card, tags=tags)
|
||||
|
||||
model_card.save(os.path.join(repo_folder, "README.md"))
|
||||
|
||||
|
||||
@@ -185,29 +159,23 @@ def log_validation(
|
||||
# We train on the simplified learning objective. If we were previously predicting a variance, we need the scheduler to ignore it
|
||||
scheduler_args = {}
|
||||
|
||||
if not args.do_edm_style_training:
|
||||
if "variance_type" in pipeline.scheduler.config:
|
||||
variance_type = pipeline.scheduler.config.variance_type
|
||||
if "variance_type" in pipeline.scheduler.config:
|
||||
variance_type = pipeline.scheduler.config.variance_type
|
||||
|
||||
if variance_type in ["learned", "learned_range"]:
|
||||
variance_type = "fixed_small"
|
||||
if variance_type in ["learned", "learned_range"]:
|
||||
variance_type = "fixed_small"
|
||||
|
||||
scheduler_args["variance_type"] = variance_type
|
||||
scheduler_args["variance_type"] = variance_type
|
||||
|
||||
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, **scheduler_args)
|
||||
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, **scheduler_args)
|
||||
|
||||
pipeline = pipeline.to(accelerator.device)
|
||||
pipeline.set_progress_bar_config(disable=True)
|
||||
|
||||
# run inference
|
||||
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
|
||||
# Currently the context determination is a bit hand-wavy. We can improve it in the future if there's a better
|
||||
# way to condition it. Reference: https://github.com/huggingface/diffusers/pull/7126#issuecomment-1968523051
|
||||
inference_ctx = (
|
||||
contextlib.nullcontext() if "playgroundai" in args.pretrained_model_name_or_path else torch.cuda.amp.autocast()
|
||||
)
|
||||
|
||||
with inference_ctx:
|
||||
with torch.cuda.amp.autocast():
|
||||
images = [pipeline(**pipeline_args, generator=generator).images[0] for _ in range(args.num_validation_images)]
|
||||
|
||||
for tracker in accelerator.trackers:
|
||||
@@ -366,12 +334,6 @@ def parse_args(input_args=None):
|
||||
" `args.validation_prompt` multiple times: `args.num_validation_images`."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--do_edm_style_training",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Flag to conduct training using the EDM formulation as introduced in https://arxiv.org/abs/2206.00364.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--with_prior_preservation",
|
||||
default=False,
|
||||
@@ -943,9 +905,6 @@ def main(args):
|
||||
" Please use `huggingface-cli login` to authenticate with the Hub."
|
||||
)
|
||||
|
||||
if args.do_edm_style_training and args.snr_gamma is not None:
|
||||
raise ValueError("Min-SNR formulation is not supported when conducting EDM-style training.")
|
||||
|
||||
logging_dir = Path(args.output_dir, args.logging_dir)
|
||||
|
||||
accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
|
||||
@@ -1059,19 +1018,7 @@ def main(args):
|
||||
)
|
||||
|
||||
# Load scheduler and models
|
||||
scheduler_type = determine_scheduler_type(args.pretrained_model_name_or_path, args.revision)
|
||||
if "EDM" in scheduler_type:
|
||||
args.do_edm_style_training = True
|
||||
noise_scheduler = EDMEulerScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
||||
logger.info("Performing EDM-style training!")
|
||||
elif args.do_edm_style_training:
|
||||
noise_scheduler = EulerDiscreteScheduler.from_pretrained(
|
||||
args.pretrained_model_name_or_path, subfolder="scheduler"
|
||||
)
|
||||
logger.info("Performing EDM-style training!")
|
||||
else:
|
||||
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
||||
|
||||
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
|
||||
text_encoder_one = text_encoder_cls_one.from_pretrained(
|
||||
args.pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, variant=args.variant
|
||||
)
|
||||
@@ -1089,12 +1036,6 @@ def main(args):
|
||||
revision=args.revision,
|
||||
variant=args.variant,
|
||||
)
|
||||
latents_mean = latents_std = None
|
||||
if hasattr(vae.config, "latents_mean") and vae.config.latents_mean is not None:
|
||||
latents_mean = torch.tensor(vae.config.latents_mean).view(1, 4, 1, 1)
|
||||
if hasattr(vae.config, "latents_std") and vae.config.latents_std is not None:
|
||||
latents_std = torch.tensor(vae.config.latents_std).view(1, 4, 1, 1)
|
||||
|
||||
unet = UNet2DConditionModel.from_pretrained(
|
||||
args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, variant=args.variant
|
||||
)
|
||||
@@ -1237,7 +1178,7 @@ def main(args):
|
||||
_set_state_dict_into_text_encoder(lora_state_dict, prefix="text_encoder.", text_encoder=text_encoder_one_)
|
||||
|
||||
_set_state_dict_into_text_encoder(
|
||||
lora_state_dict, prefix="text_encoder_2.", text_encoder=text_encoder_two_
|
||||
lora_state_dict, prefix="text_encoder_2.", text_encoder=text_encoder_one_
|
||||
)
|
||||
|
||||
# Make sure the trainable params are in float32. This is again needed since the base models
|
||||
@@ -1492,12 +1433,7 @@ def main(args):
|
||||
# We need to initialize the trackers we use, and also store our configuration.
|
||||
# The trackers initializes automatically on the main process.
|
||||
if accelerator.is_main_process:
|
||||
tracker_name = (
|
||||
"dreambooth-lora-sd-xl"
|
||||
if "playgroundai" not in args.pretrained_model_name_or_path
|
||||
else "dreambooth-lora-playground"
|
||||
)
|
||||
accelerator.init_trackers(tracker_name, config=vars(args))
|
||||
accelerator.init_trackers("dreambooth-lora-sd-xl", config=vars(args))
|
||||
|
||||
# Train!
|
||||
total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
@@ -1549,18 +1485,6 @@ def main(args):
|
||||
disable=not accelerator.is_local_main_process,
|
||||
)
|
||||
|
||||
def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
|
||||
sigmas = noise_scheduler.sigmas.to(device=accelerator.device, dtype=dtype)
|
||||
schedule_timesteps = noise_scheduler.timesteps.to(accelerator.device)
|
||||
timesteps = timesteps.to(accelerator.device)
|
||||
|
||||
step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
|
||||
|
||||
sigma = sigmas[step_indices].flatten()
|
||||
while len(sigma.shape) < n_dim:
|
||||
sigma = sigma.unsqueeze(-1)
|
||||
return sigma
|
||||
|
||||
for epoch in range(first_epoch, args.num_train_epochs):
|
||||
unet.train()
|
||||
if args.train_text_encoder:
|
||||
@@ -1588,46 +1512,22 @@ def main(args):
|
||||
|
||||
# Convert images to latent space
|
||||
model_input = vae.encode(pixel_values).latent_dist.sample()
|
||||
|
||||
if latents_mean is None and latents_std is None:
|
||||
model_input = model_input * vae.config.scaling_factor
|
||||
if args.pretrained_vae_model_name_or_path is None:
|
||||
model_input = model_input.to(weight_dtype)
|
||||
else:
|
||||
latents_mean = latents_mean.to(device=model_input.device, dtype=model_input.dtype)
|
||||
latents_std = latents_std.to(device=model_input.device, dtype=model_input.dtype)
|
||||
model_input = (model_input - latents_mean) * vae.config.scaling_factor / latents_std
|
||||
model_input = model_input.to(dtype=weight_dtype)
|
||||
model_input = model_input * vae.config.scaling_factor
|
||||
if args.pretrained_vae_model_name_or_path is None:
|
||||
model_input = model_input.to(weight_dtype)
|
||||
|
||||
# Sample noise that we'll add to the latents
|
||||
noise = torch.randn_like(model_input)
|
||||
bsz = model_input.shape[0]
|
||||
|
||||
# Sample a random timestep for each image
|
||||
if not args.do_edm_style_training:
|
||||
timesteps = torch.randint(
|
||||
0, noise_scheduler.config.num_train_timesteps, (bsz,), device=model_input.device
|
||||
)
|
||||
timesteps = timesteps.long()
|
||||
else:
|
||||
# in EDM formulation, the model is conditioned on the pre-conditioned noise levels
|
||||
# instead of discrete timesteps, so here we sample indices to get the noise levels
|
||||
# from `scheduler.timesteps`
|
||||
indices = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,))
|
||||
timesteps = noise_scheduler.timesteps[indices].to(device=model_input.device)
|
||||
timesteps = torch.randint(
|
||||
0, noise_scheduler.config.num_train_timesteps, (bsz,), device=model_input.device
|
||||
)
|
||||
timesteps = timesteps.long()
|
||||
|
||||
# Add noise to the model input according to the noise magnitude at each timestep
|
||||
# (this is the forward diffusion process)
|
||||
noisy_model_input = noise_scheduler.add_noise(model_input, noise, timesteps)
|
||||
# For EDM-style training, we first obtain the sigmas based on the continuous timesteps.
|
||||
# We then precondition the final model inputs based on these sigmas instead of the timesteps.
|
||||
# Follow: Section 5 of https://arxiv.org/abs/2206.00364.
|
||||
if args.do_edm_style_training:
|
||||
sigmas = get_sigmas(timesteps, len(noisy_model_input.shape), noisy_model_input.dtype)
|
||||
if "EDM" in scheduler_type:
|
||||
inp_noisy_latents = noise_scheduler.precondition_inputs(noisy_model_input, sigmas)
|
||||
else:
|
||||
inp_noisy_latents = noisy_model_input / ((sigmas**2 + 1) ** 0.5)
|
||||
|
||||
# time ids
|
||||
add_time_ids = torch.cat(
|
||||
@@ -1651,7 +1551,7 @@ def main(args):
|
||||
}
|
||||
prompt_embeds_input = prompt_embeds.repeat(elems_to_repeat_text_embeds, 1, 1)
|
||||
model_pred = unet(
|
||||
inp_noisy_latents if args.do_edm_style_training else noisy_model_input,
|
||||
noisy_model_input,
|
||||
timesteps,
|
||||
prompt_embeds_input,
|
||||
added_cond_kwargs=unet_added_conditions,
|
||||
@@ -1670,43 +1570,18 @@ def main(args):
|
||||
)
|
||||
prompt_embeds_input = prompt_embeds.repeat(elems_to_repeat_text_embeds, 1, 1)
|
||||
model_pred = unet(
|
||||
inp_noisy_latents if args.do_edm_style_training else noisy_model_input,
|
||||
noisy_model_input,
|
||||
timesteps,
|
||||
prompt_embeds_input,
|
||||
added_cond_kwargs=unet_added_conditions,
|
||||
return_dict=False,
|
||||
)[0]
|
||||
|
||||
weighting = None
|
||||
if args.do_edm_style_training:
|
||||
# Similar to the input preconditioning, the model predictions are also preconditioned
|
||||
# on noised model inputs (before preconditioning) and the sigmas.
|
||||
# Follow: Section 5 of https://arxiv.org/abs/2206.00364.
|
||||
if "EDM" in scheduler_type:
|
||||
model_pred = noise_scheduler.precondition_outputs(noisy_model_input, model_pred, sigmas)
|
||||
else:
|
||||
if noise_scheduler.config.prediction_type == "epsilon":
|
||||
model_pred = model_pred * (-sigmas) + noisy_model_input
|
||||
elif noise_scheduler.config.prediction_type == "v_prediction":
|
||||
model_pred = model_pred * (-sigmas / (sigmas**2 + 1) ** 0.5) + (
|
||||
noisy_model_input / (sigmas**2 + 1)
|
||||
)
|
||||
# We are not doing weighting here because it tends result in numerical problems.
|
||||
# See: https://github.com/huggingface/diffusers/pull/7126#issuecomment-1968523051
|
||||
# There might be other alternatives for weighting as well:
|
||||
# https://github.com/huggingface/diffusers/pull/7126#discussion_r1505404686
|
||||
if "EDM" not in scheduler_type:
|
||||
weighting = (sigmas**-2.0).float()
|
||||
|
||||
# Get the target for loss depending on the prediction type
|
||||
if noise_scheduler.config.prediction_type == "epsilon":
|
||||
target = model_input if args.do_edm_style_training else noise
|
||||
target = noise
|
||||
elif noise_scheduler.config.prediction_type == "v_prediction":
|
||||
target = (
|
||||
model_input
|
||||
if args.do_edm_style_training
|
||||
else noise_scheduler.get_velocity(model_input, noise, timesteps)
|
||||
)
|
||||
target = noise_scheduler.get_velocity(model_input, noise, timesteps)
|
||||
else:
|
||||
raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
|
||||
|
||||
@@ -1716,28 +1591,10 @@ def main(args):
|
||||
target, target_prior = torch.chunk(target, 2, dim=0)
|
||||
|
||||
# Compute prior loss
|
||||
if weighting is not None:
|
||||
prior_loss = torch.mean(
|
||||
(weighting.float() * (model_pred_prior.float() - target_prior.float()) ** 2).reshape(
|
||||
target_prior.shape[0], -1
|
||||
),
|
||||
1,
|
||||
)
|
||||
prior_loss = prior_loss.mean()
|
||||
else:
|
||||
prior_loss = F.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean")
|
||||
prior_loss = F.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean")
|
||||
|
||||
if args.snr_gamma is None:
|
||||
if weighting is not None:
|
||||
loss = torch.mean(
|
||||
(weighting.float() * (model_pred.float() - target.float()) ** 2).reshape(
|
||||
target.shape[0], -1
|
||||
),
|
||||
1,
|
||||
)
|
||||
loss = loss.mean()
|
||||
else:
|
||||
loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
|
||||
loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
|
||||
else:
|
||||
# Compute loss-weights as per Section 3.4 of https://arxiv.org/abs/2303.09556.
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
@@ -1839,6 +1696,7 @@ def main(args):
|
||||
variant=args.variant,
|
||||
torch_dtype=weight_dtype,
|
||||
)
|
||||
|
||||
pipeline_args = {"prompt": args.validation_prompt}
|
||||
|
||||
images = log_validation(
|
||||
|
||||
@@ -35,7 +35,7 @@ import transformers
|
||||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
from accelerate.utils import ProjectConfiguration, set_seed
|
||||
from datasets import concatenate_datasets, load_dataset
|
||||
from datasets import load_dataset
|
||||
from huggingface_hub import create_repo, upload_folder
|
||||
from packaging import version
|
||||
from torchvision import transforms
|
||||
@@ -895,20 +895,14 @@ def main(args):
|
||||
# fingerprint used by the cache for the other processes to load the result
|
||||
# details: https://github.com/huggingface/diffusers/pull/4038#discussion_r1266078401
|
||||
new_fingerprint = Hasher.hash(args)
|
||||
new_fingerprint_for_vae = Hasher.hash(vae_path)
|
||||
train_dataset_with_embeddings = train_dataset.map(
|
||||
compute_embeddings_fn, batched=True, new_fingerprint=new_fingerprint
|
||||
)
|
||||
train_dataset_with_vae = train_dataset.map(
|
||||
new_fingerprint_for_vae = Hasher.hash("vae")
|
||||
train_dataset = train_dataset.map(compute_embeddings_fn, batched=True, new_fingerprint=new_fingerprint)
|
||||
train_dataset = train_dataset.map(
|
||||
compute_vae_encodings_fn,
|
||||
batched=True,
|
||||
batch_size=args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps,
|
||||
new_fingerprint=new_fingerprint_for_vae,
|
||||
)
|
||||
precomputed_dataset = concatenate_datasets(
|
||||
[train_dataset_with_embeddings, train_dataset_with_vae.remove_columns(["image", "text"])], axis=1
|
||||
)
|
||||
precomputed_dataset = precomputed_dataset.with_transform(preprocess_train)
|
||||
|
||||
del text_encoders, tokenizers, vae
|
||||
gc.collect()
|
||||
@@ -931,7 +925,7 @@ def main(args):
|
||||
|
||||
# DataLoaders creation:
|
||||
train_dataloader = torch.utils.data.DataLoader(
|
||||
precomputed_dataset,
|
||||
train_dataset,
|
||||
shuffle=True,
|
||||
collate_fn=collate_fn,
|
||||
batch_size=args.train_batch_size,
|
||||
@@ -982,7 +976,7 @@ def main(args):
|
||||
total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
|
||||
logger.info("***** Running training *****")
|
||||
logger.info(f" Num examples = {len(precomputed_dataset)}")
|
||||
logger.info(f" Num examples = {len(train_dataset)}")
|
||||
logger.info(f" Num Epochs = {args.num_train_epochs}")
|
||||
logger.info(f" Instantaneous batch size per device = {args.train_batch_size}")
|
||||
logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
|
||||
|
||||
@@ -2,6 +2,7 @@ accelerate>=0.16.0
|
||||
torchvision
|
||||
transformers>=4.25.1
|
||||
wandb
|
||||
huggingface-cli
|
||||
bitsandbytes
|
||||
deepspeed
|
||||
peft>=0.6.0
|
||||
|
||||
@@ -25,7 +25,3 @@ skip-magic-trailing-comma = false
|
||||
|
||||
# Like Black, automatically detect the appropriate line ending.
|
||||
line-ending = "auto"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
|
||||
doctest_glob="**/*.md"
|
||||
@@ -9,11 +9,11 @@ from diffusers import AutoencoderKL, DPMSolverMultistepScheduler, PixArtAlphaPip
|
||||
|
||||
ckpt_id = "PixArt-alpha/PixArt-alpha"
|
||||
# https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/scripts/inference.py#L125
|
||||
interpolation_scale = {256: 0.5, 512: 1, 1024: 2}
|
||||
interpolation_scale = {512: 1, 1024: 2}
|
||||
|
||||
|
||||
def main(args):
|
||||
all_state_dict = torch.load(args.orig_ckpt_path, map_location="cpu")
|
||||
all_state_dict = torch.load(args.orig_ckpt_path)
|
||||
state_dict = all_state_dict.pop("state_dict")
|
||||
converted_state_dict = {}
|
||||
|
||||
@@ -22,6 +22,7 @@ def main(args):
|
||||
converted_state_dict["pos_embed.proj.bias"] = state_dict.pop("x_embedder.proj.bias")
|
||||
|
||||
# Caption projection.
|
||||
converted_state_dict["caption_projection.y_embedding"] = state_dict.pop("y_embedder.y_embedding")
|
||||
converted_state_dict["caption_projection.linear_1.weight"] = state_dict.pop("y_embedder.y_proj.fc1.weight")
|
||||
converted_state_dict["caption_projection.linear_1.bias"] = state_dict.pop("y_embedder.y_proj.fc1.bias")
|
||||
converted_state_dict["caption_projection.linear_2.weight"] = state_dict.pop("y_embedder.y_proj.fc2.weight")
|
||||
@@ -154,7 +155,6 @@ def main(args):
|
||||
|
||||
assert transformer.pos_embed.pos_embed is not None
|
||||
state_dict.pop("pos_embed")
|
||||
state_dict.pop("y_embedder.y_embedding")
|
||||
assert len(state_dict) == 0, f"State dict is not empty, {state_dict.keys()}"
|
||||
|
||||
num_model_params = sum(p.numel() for p in transformer.parameters())
|
||||
@@ -187,7 +187,7 @@ if __name__ == "__main__":
|
||||
"--image_size",
|
||||
default=1024,
|
||||
type=int,
|
||||
choices=[256, 512, 1024],
|
||||
choices=[512, 1024],
|
||||
required=False,
|
||||
help="Image size of pretrained model, either 512 or 1024.",
|
||||
)
|
||||
|
||||
@@ -1,185 +0,0 @@
|
||||
import doctest
|
||||
import inspect
|
||||
import os
|
||||
import re
|
||||
from typing import Iterable
|
||||
|
||||
from .utils import is_pytest_available
|
||||
|
||||
|
||||
if is_pytest_available():
|
||||
from _pytest.doctest import (
|
||||
Module,
|
||||
_get_checker,
|
||||
_get_continue_on_failure,
|
||||
_get_runner,
|
||||
_is_mocked,
|
||||
_patch_unwrap_mock_aware,
|
||||
get_optionflags,
|
||||
import_path,
|
||||
)
|
||||
from _pytest.outcomes import skip
|
||||
from pytest import DoctestItem
|
||||
else:
|
||||
Module = object
|
||||
DoctestItem = object
|
||||
|
||||
"""
|
||||
The following contains utils to run the documentation tests without having to overwrite any files.
|
||||
|
||||
The `preprocess_string` function adds `# doctest: +IGNORE_RESULT` markers on the fly anywhere a `load_dataset` call is
|
||||
made as a print would otherwise fail the corresonding line.
|
||||
|
||||
To skip cuda tests, make sure to call `SKIP_CUDA_DOCTEST=1 pytest --doctest-modules <path_to_files_to_test>
|
||||
"""
|
||||
|
||||
|
||||
def preprocess_string(string, skip_cuda_tests):
|
||||
"""Prepare a docstring or a `.md` file to be run by doctest.
|
||||
|
||||
The argument `string` would be the whole file content if it is a `.md` file. For a python file, it would be one of
|
||||
its docstring. In each case, it may contain multiple python code examples. If `skip_cuda_tests` is `True` and a
|
||||
cuda stuff is detective (with a heuristic), this method will return an empty string so no doctest will be run for
|
||||
`string`.
|
||||
"""
|
||||
codeblock_pattern = r"(```(?:python|py)\s*\n\s*>>> )((?:(?!```)[^])*?```)"
|
||||
codeblocks = re.split(re.compile(codeblock_pattern, flags=re.MULTILINE | re.DOTALL), string)
|
||||
is_cuda_found = False
|
||||
for i, codeblock in enumerate(codeblocks):
|
||||
if "load_dataset(" in codeblock and "# doctest: +IGNORE_RESULT" not in codeblock:
|
||||
codeblocks[i] = re.sub(r"(>>> .*load_dataset\(.*)", r"\1 # doctest: +IGNORE_RESULT", codeblock)
|
||||
if (
|
||||
(">>>" in codeblock or "..." in codeblock)
|
||||
and re.search(r"cuda|to\(0\)|device=0", codeblock)
|
||||
and skip_cuda_tests
|
||||
):
|
||||
is_cuda_found = True
|
||||
break
|
||||
|
||||
modified_string = ""
|
||||
if not is_cuda_found:
|
||||
modified_string = "".join(codeblocks)
|
||||
|
||||
return modified_string
|
||||
|
||||
|
||||
class HfDocTestParser(doctest.DocTestParser):
|
||||
"""
|
||||
Overwrites the DocTestParser from doctest to properly parse the codeblocks that are formatted with black. This
|
||||
means that there are no extra lines at the end of our snippets. The `# doctest: +IGNORE_RESULT` marker is also
|
||||
added anywhere a `load_dataset` call is made as a print would otherwise fail the corresponding line.
|
||||
|
||||
Tests involving cuda are skipped base on a naive pattern that should be updated if it is not enough.
|
||||
"""
|
||||
|
||||
# This regular expression is used to find doctest examples in a
|
||||
# string. It defines three groups: `source` is the source code
|
||||
# (including leading indentation and prompts); `indent` is the
|
||||
# indentation of the first (PS1) line of the source code; and
|
||||
# `want` is the expected output (including leading indentation).
|
||||
# fmt: off
|
||||
_EXAMPLE_RE = re.compile(r'''
|
||||
# Source consists of a PS1 line followed by zero or more PS2 lines.
|
||||
(?P<source>
|
||||
(?:^(?P<indent> [ ]*) >>> .*) # PS1 line
|
||||
(?:\n [ ]* \.\.\. .*)*) # PS2 lines
|
||||
\n?
|
||||
# Want consists of any non-blank lines that do not start with PS1.
|
||||
(?P<want> (?:(?![ ]*$) # Not a blank line
|
||||
(?![ ]*>>>) # Not a line starting with PS1
|
||||
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||
(?:(?!```).)* # Match any character except '`' until a '```' is found (this is specific to HF because black removes the last line)
|
||||
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||
(?:\n|$) # Match a new line or end of string
|
||||
)*)
|
||||
''', re.MULTILINE | re.VERBOSE
|
||||
)
|
||||
# fmt: on
|
||||
|
||||
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||
skip_cuda_tests: bool = bool(os.environ.get("SKIP_CUDA_DOCTEST", False))
|
||||
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||
|
||||
def parse(self, string, name="<string>"):
|
||||
"""
|
||||
Overwrites the `parse` method to incorporate a skip for CUDA tests, and remove logs and dataset prints before
|
||||
calling `super().parse`
|
||||
"""
|
||||
string = preprocess_string(string, self.skip_cuda_tests)
|
||||
return super().parse(string, name)
|
||||
|
||||
|
||||
class HfDoctestModule(Module):
|
||||
"""
|
||||
Overwrites the `DoctestModule` of the pytest package to make sure the HFDocTestParser is used when discovering
|
||||
tests.
|
||||
"""
|
||||
|
||||
def collect(self) -> Iterable["DoctestItem"]:
|
||||
class MockAwareDocTestFinder(doctest.DocTestFinder):
|
||||
"""A hackish doctest finder that overrides stdlib internals to fix a stdlib bug.
|
||||
|
||||
https://github.com/pytest-dev/pytest/issues/3456 https://bugs.python.org/issue25532
|
||||
"""
|
||||
|
||||
def _find_lineno(self, obj, source_lines):
|
||||
"""Doctest code does not take into account `@property`, this
|
||||
is a hackish way to fix it. https://bugs.python.org/issue17446
|
||||
|
||||
Wrapped Doctests will need to be unwrapped so the correct line number is returned. This will be
|
||||
reported upstream. #8796
|
||||
"""
|
||||
if isinstance(obj, property):
|
||||
obj = getattr(obj, "fget", obj)
|
||||
|
||||
if hasattr(obj, "__wrapped__"):
|
||||
# Get the main obj in case of it being wrapped
|
||||
obj = inspect.unwrap(obj)
|
||||
|
||||
# Type ignored because this is a private function.
|
||||
return super()._find_lineno( # type:ignore[misc]
|
||||
obj,
|
||||
source_lines,
|
||||
)
|
||||
|
||||
def _find(self, tests, obj, name, module, source_lines, globs, seen) -> None:
|
||||
if _is_mocked(obj):
|
||||
return
|
||||
with _patch_unwrap_mock_aware():
|
||||
# Type ignored because this is a private function.
|
||||
super()._find( # type:ignore[misc]
|
||||
tests, obj, name, module, source_lines, globs, seen
|
||||
)
|
||||
|
||||
if self.path.name == "conftest.py":
|
||||
module = self.config.pluginmanager._importconftest(
|
||||
self.path,
|
||||
self.config.getoption("importmode"),
|
||||
rootpath=self.config.rootpath,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
module = import_path(
|
||||
self.path,
|
||||
root=self.config.rootpath,
|
||||
mode=self.config.getoption("importmode"),
|
||||
)
|
||||
except ImportError:
|
||||
if self.config.getvalue("doctest_ignore_import_errors"):
|
||||
skip("unable to import module %r" % self.path)
|
||||
else:
|
||||
raise
|
||||
|
||||
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||
finder = MockAwareDocTestFinder(parser=HfDocTestParser())
|
||||
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
|
||||
optionflags = get_optionflags(self)
|
||||
runner = _get_runner(
|
||||
verbose=False,
|
||||
optionflags=optionflags,
|
||||
checker=_get_checker(),
|
||||
continue_on_failure=_get_continue_on_failure(self.config),
|
||||
)
|
||||
for test in finder.find(module, module.__name__):
|
||||
if test.examples: # skip empty doctests and cuda
|
||||
yield DoctestItem.from_parent(self, name=test.name, runner=runner, dtest=test)
|
||||
@@ -97,7 +97,6 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
|
||||
norm_eps: float = 1e-5,
|
||||
attention_type: str = "default",
|
||||
caption_channels: int = None,
|
||||
interpolation_scale: float = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.use_linear_projection = use_linear_projection
|
||||
@@ -169,9 +168,8 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
|
||||
self.width = sample_size
|
||||
|
||||
self.patch_size = patch_size
|
||||
interpolation_scale = (
|
||||
interpolation_scale if interpolation_scale is not None else max(self.config.sample_size // 64, 1)
|
||||
)
|
||||
interpolation_scale = self.config.sample_size // 64 # => 64 (= 512 pixart) has interpolation scale 1
|
||||
interpolation_scale = max(interpolation_scale, 1)
|
||||
self.pos_embed = PatchEmbed(
|
||||
height=sample_size,
|
||||
width=sample_size,
|
||||
|
||||
@@ -400,22 +400,15 @@ class AnimateDiffPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -516,9 +509,9 @@ class AnimateDiffPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
|
||||
@@ -478,22 +478,15 @@ class AnimateDiffVideoToVideoPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -596,9 +589,9 @@ class AnimateDiffVideoToVideoPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def get_timesteps(self, num_inference_steps, timesteps, strength, device):
|
||||
|
||||
@@ -510,22 +510,15 @@ class StableDiffusionControlNetPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -733,9 +726,9 @@ class StableDiffusionControlNetPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def check_image(self, image, prompt, prompt_embeds):
|
||||
|
||||
@@ -503,22 +503,15 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -720,9 +713,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
|
||||
|
||||
@@ -628,22 +628,15 @@ class StableDiffusionControlNetInpaintPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -878,9 +871,9 @@ class StableDiffusionControlNetInpaintPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
|
||||
|
||||
@@ -537,22 +537,15 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -824,9 +817,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def prepare_control_image(
|
||||
|
||||
@@ -515,22 +515,15 @@ class StableDiffusionXLControlNetPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -737,9 +730,9 @@ class StableDiffusionXLControlNetPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
|
||||
|
||||
@@ -567,22 +567,15 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -801,9 +794,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet_sd_xl.StableDiffusionXLControlNetPipeline.check_image
|
||||
|
||||
+5
-12
@@ -453,22 +453,15 @@ class LatentConsistencyModelImg2ImgPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -654,9 +647,9 @@ class LatentConsistencyModelImg2ImgPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
+5
-12
@@ -437,22 +437,15 @@ class LatentConsistencyModelPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -586,9 +579,9 @@ class LatentConsistencyModelPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
@@ -582,9 +582,9 @@ class PIAPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_ip_adapter_image_embeds
|
||||
@@ -619,22 +619,15 @@ class PIAPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
|
||||
@@ -133,42 +133,6 @@ ASPECT_RATIO_512_BIN = {
|
||||
"4.0": [1024.0, 256.0],
|
||||
}
|
||||
|
||||
ASPECT_RATIO_256_BIN = {
|
||||
"0.25": [128.0, 512.0],
|
||||
"0.28": [128.0, 464.0],
|
||||
"0.32": [144.0, 448.0],
|
||||
"0.33": [144.0, 432.0],
|
||||
"0.35": [144.0, 416.0],
|
||||
"0.4": [160.0, 400.0],
|
||||
"0.42": [160.0, 384.0],
|
||||
"0.48": [176.0, 368.0],
|
||||
"0.5": [176.0, 352.0],
|
||||
"0.52": [176.0, 336.0],
|
||||
"0.57": [192.0, 336.0],
|
||||
"0.6": [192.0, 320.0],
|
||||
"0.68": [208.0, 304.0],
|
||||
"0.72": [208.0, 288.0],
|
||||
"0.78": [224.0, 288.0],
|
||||
"0.82": [224.0, 272.0],
|
||||
"0.88": [240.0, 272.0],
|
||||
"0.94": [240.0, 256.0],
|
||||
"1.0": [256.0, 256.0],
|
||||
"1.07": [256.0, 240.0],
|
||||
"1.13": [272.0, 240.0],
|
||||
"1.21": [272.0, 224.0],
|
||||
"1.29": [288.0, 224.0],
|
||||
"1.38": [288.0, 208.0],
|
||||
"1.46": [304.0, 208.0],
|
||||
"1.67": [320.0, 192.0],
|
||||
"1.75": [336.0, 192.0],
|
||||
"2.0": [352.0, 176.0],
|
||||
"2.09": [368.0, 176.0],
|
||||
"2.4": [384.0, 160.0],
|
||||
"2.5": [400.0, 160.0],
|
||||
"3.0": [432.0, 144.0],
|
||||
"4.0": [512.0, 128.0],
|
||||
}
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
|
||||
def retrieve_timesteps(
|
||||
@@ -296,7 +260,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
prompt_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
negative_prompt_attention_mask: Optional[torch.FloatTensor] = None,
|
||||
clean_caption: bool = False,
|
||||
max_sequence_length: int = 120,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
@@ -321,9 +284,8 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
|
||||
Pre-generated negative text embeddings. For PixArt-Alpha, it's should be the embeddings of the ""
|
||||
string.
|
||||
clean_caption (`bool`, defaults to `False`):
|
||||
clean_caption (bool, defaults to `False`):
|
||||
If `True`, the function will preprocess and clean the provided caption before encoding.
|
||||
max_sequence_length (`int`, defaults to 120): Maximum sequence length to use for the prompt.
|
||||
"""
|
||||
|
||||
if "mask_feature" in kwargs:
|
||||
@@ -341,7 +303,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
batch_size = prompt_embeds.shape[0]
|
||||
|
||||
# See Section 3.1. of the paper.
|
||||
max_length = max_sequence_length
|
||||
max_length = 120
|
||||
|
||||
if prompt_embeds is None:
|
||||
prompt = self._text_preprocessing(prompt, clean_caption=clean_caption)
|
||||
@@ -726,7 +688,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
callback_steps: int = 1,
|
||||
clean_caption: bool = True,
|
||||
use_resolution_binning: bool = True,
|
||||
max_sequence_length: int = 120,
|
||||
**kwargs,
|
||||
) -> Union[ImagePipelineOutput, Tuple]:
|
||||
"""
|
||||
@@ -796,7 +757,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
If set to `True`, the requested height and width are first mapped to the closest resolutions using
|
||||
`ASPECT_RATIO_1024_BIN`. After the produced latents are decoded into images, they are resized back to
|
||||
the requested resolution. Useful for generating non-square images.
|
||||
max_sequence_length (`int` defaults to 120): Maximum sequence length to use with the `prompt`.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -812,14 +772,9 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
height = height or self.transformer.config.sample_size * self.vae_scale_factor
|
||||
width = width or self.transformer.config.sample_size * self.vae_scale_factor
|
||||
if use_resolution_binning:
|
||||
if self.transformer.config.sample_size == 128:
|
||||
aspect_ratio_bin = ASPECT_RATIO_1024_BIN
|
||||
elif self.transformer.config.sample_size == 64:
|
||||
aspect_ratio_bin = ASPECT_RATIO_512_BIN
|
||||
elif self.transformer.config.sample_size == 32:
|
||||
aspect_ratio_bin = ASPECT_RATIO_256_BIN
|
||||
else:
|
||||
raise ValueError("Invalid sample size")
|
||||
aspect_ratio_bin = (
|
||||
ASPECT_RATIO_1024_BIN if self.transformer.config.sample_size == 128 else ASPECT_RATIO_512_BIN
|
||||
)
|
||||
orig_height, orig_width = height, width
|
||||
height, width = self.classify_height_width_bin(height, width, ratios=aspect_ratio_bin)
|
||||
|
||||
@@ -867,7 +822,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
||||
prompt_attention_mask=prompt_attention_mask,
|
||||
negative_prompt_attention_mask=negative_prompt_attention_mask,
|
||||
clean_caption=clean_caption,
|
||||
max_sequence_length=max_sequence_length,
|
||||
)
|
||||
if do_classifier_free_guidance:
|
||||
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
||||
|
||||
@@ -520,22 +520,15 @@ class StableDiffusionPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -646,9 +639,9 @@ class StableDiffusionPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
||||
|
||||
@@ -564,22 +564,15 @@ class StableDiffusionImg2ImgPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -692,9 +685,9 @@ class StableDiffusionImg2ImgPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def get_timesteps(self, num_inference_steps, strength, device):
|
||||
|
||||
@@ -636,22 +636,15 @@ class StableDiffusionInpaintPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -774,9 +767,9 @@ class StableDiffusionInpaintPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
+24
-203
@@ -59,66 +59,6 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
||||
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
||||
"""
|
||||
Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
|
||||
Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
|
||||
"""
|
||||
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
||||
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
||||
# rescale the results from guidance (fixes overexposure)
|
||||
noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
|
||||
# mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images
|
||||
noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
|
||||
return noise_cfg
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
|
||||
def retrieve_timesteps(
|
||||
scheduler,
|
||||
num_inference_steps: Optional[int] = None,
|
||||
device: Optional[Union[str, torch.device]] = None,
|
||||
timesteps: Optional[List[int]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
||||
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
||||
|
||||
Args:
|
||||
scheduler (`SchedulerMixin`):
|
||||
The scheduler to get timesteps from.
|
||||
num_inference_steps (`int`):
|
||||
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
||||
`timesteps` must be `None`.
|
||||
device (`str` or `torch.device`, *optional*):
|
||||
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
||||
timesteps (`List[int]`, *optional*):
|
||||
Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
|
||||
timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
|
||||
must be `None`.
|
||||
|
||||
Returns:
|
||||
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
||||
second element is the number of inference steps.
|
||||
"""
|
||||
if timesteps is not None:
|
||||
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
||||
if not accepts_timesteps:
|
||||
raise ValueError(
|
||||
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
||||
f" timestep schedules. Please check whether you are using the correct scheduler."
|
||||
)
|
||||
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
||||
timesteps = scheduler.timesteps
|
||||
num_inference_steps = len(timesteps)
|
||||
else:
|
||||
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
||||
timesteps = scheduler.timesteps
|
||||
return timesteps, num_inference_steps
|
||||
|
||||
|
||||
@dataclass
|
||||
class LDM3DPipelineOutput(BaseOutput):
|
||||
"""
|
||||
@@ -185,7 +125,6 @@ class StableDiffusionLDM3DPipeline(
|
||||
model_cpu_offload_seq = "text_encoder->unet->vae"
|
||||
_optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
|
||||
_exclude_from_cpu_offload = ["safety_checker"]
|
||||
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -503,22 +442,15 @@ class StableDiffusionLDM3DPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -621,9 +553,9 @@ class StableDiffusionLDM3DPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
||||
@@ -643,66 +575,6 @@ class StableDiffusionLDM3DPipeline(
|
||||
latents = latents * self.scheduler.init_noise_sigma
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
||||
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
|
||||
"""
|
||||
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
||||
|
||||
Args:
|
||||
timesteps (`torch.Tensor`):
|
||||
generate embedding vectors at these timesteps
|
||||
embedding_dim (`int`, *optional*, defaults to 512):
|
||||
dimension of the embeddings to generate
|
||||
dtype:
|
||||
data type of the generated embeddings
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
|
||||
"""
|
||||
assert len(w.shape) == 1
|
||||
w = w * 1000.0
|
||||
|
||||
half_dim = embedding_dim // 2
|
||||
emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
|
||||
emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
|
||||
emb = w.to(dtype)[:, None] * emb[None, :]
|
||||
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
|
||||
if embedding_dim % 2 == 1: # zero pad
|
||||
emb = torch.nn.functional.pad(emb, (0, 1))
|
||||
assert emb.shape == (w.shape[0], embedding_dim)
|
||||
return emb
|
||||
|
||||
@property
|
||||
def guidance_scale(self):
|
||||
return self._guidance_scale
|
||||
|
||||
@property
|
||||
def guidance_rescale(self):
|
||||
return self._guidance_rescale
|
||||
|
||||
@property
|
||||
def clip_skip(self):
|
||||
return self._clip_skip
|
||||
|
||||
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
||||
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
|
||||
# corresponds to doing no classifier free guidance.
|
||||
@property
|
||||
def do_classifier_free_guidance(self):
|
||||
return self._guidance_scale > 1 and self.unet.config.time_cond_proj_dim is None
|
||||
|
||||
@property
|
||||
def cross_attention_kwargs(self):
|
||||
return self._cross_attention_kwargs
|
||||
|
||||
@property
|
||||
def num_timesteps(self):
|
||||
return self._num_timesteps
|
||||
|
||||
@property
|
||||
def interrupt(self):
|
||||
return self._interrupt
|
||||
|
||||
@torch.no_grad()
|
||||
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
||||
def __call__(
|
||||
@@ -711,7 +583,6 @@ class StableDiffusionLDM3DPipeline(
|
||||
height: Optional[int] = None,
|
||||
width: Optional[int] = None,
|
||||
num_inference_steps: int = 49,
|
||||
timesteps: List[int] = None,
|
||||
guidance_scale: float = 5.0,
|
||||
negative_prompt: Optional[Union[str, List[str]]] = None,
|
||||
num_images_per_prompt: Optional[int] = 1,
|
||||
@@ -724,12 +595,10 @@ class StableDiffusionLDM3DPipeline(
|
||||
ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
|
||||
output_type: Optional[str] = "pil",
|
||||
return_dict: bool = True,
|
||||
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
||||
callback_steps: int = 1,
|
||||
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
||||
guidance_rescale: float = 0.0,
|
||||
clip_skip: Optional[int] = None,
|
||||
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
||||
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
The call function to the pipeline for generation.
|
||||
@@ -780,21 +649,18 @@ class StableDiffusionLDM3DPipeline(
|
||||
return_dict (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
|
||||
plain tuple.
|
||||
callback (`Callable`, *optional*):
|
||||
A function that calls every `callback_steps` steps during inference. The function is called with the
|
||||
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
|
||||
callback_steps (`int`, *optional*, defaults to 1):
|
||||
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
||||
every step.
|
||||
cross_attention_kwargs (`dict`, *optional*):
|
||||
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
|
||||
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
||||
clip_skip (`int`, *optional*):
|
||||
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
||||
the output of the pre-final layer will be used for computing the prompt embeddings.
|
||||
callback_on_step_end (`Callable`, *optional*):
|
||||
A function that calls at the end of each denoising steps during the inference. The function is called
|
||||
with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
|
||||
callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
|
||||
`callback_on_step_end_tensor_inputs`.
|
||||
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
||||
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
||||
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
||||
`._callback_tensor_inputs` attribute of your pipeline class.
|
||||
Examples:
|
||||
|
||||
Returns:
|
||||
@@ -804,22 +670,6 @@ class StableDiffusionLDM3DPipeline(
|
||||
second element is a list of `bool`s indicating whether the corresponding generated image contains
|
||||
"not-safe-for-work" (nsfw) content.
|
||||
"""
|
||||
callback = kwargs.pop("callback", None)
|
||||
callback_steps = kwargs.pop("callback_steps", None)
|
||||
|
||||
if callback is not None:
|
||||
deprecate(
|
||||
"callback",
|
||||
"1.0.0",
|
||||
"Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
||||
)
|
||||
if callback_steps is not None:
|
||||
deprecate(
|
||||
"callback_steps",
|
||||
"1.0.0",
|
||||
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
||||
)
|
||||
|
||||
# 0. Default height and width to unet
|
||||
height = height or self.unet.config.sample_size * self.vae_scale_factor
|
||||
width = width or self.unet.config.sample_size * self.vae_scale_factor
|
||||
@@ -835,15 +685,8 @@ class StableDiffusionLDM3DPipeline(
|
||||
negative_prompt_embeds,
|
||||
ip_adapter_image,
|
||||
ip_adapter_image_embeds,
|
||||
callback_on_step_end_tensor_inputs,
|
||||
)
|
||||
|
||||
self._guidance_scale = guidance_scale
|
||||
self._guidance_rescale = guidance_rescale
|
||||
self._clip_skip = clip_skip
|
||||
self._cross_attention_kwargs = cross_attention_kwargs
|
||||
self._interrupt = False
|
||||
|
||||
# 2. Define call parameters
|
||||
if prompt is not None and isinstance(prompt, str):
|
||||
batch_size = 1
|
||||
@@ -853,6 +696,10 @@ class StableDiffusionLDM3DPipeline(
|
||||
batch_size = prompt_embeds.shape[0]
|
||||
|
||||
device = self._execution_device
|
||||
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
||||
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
|
||||
# corresponds to doing no classifier free guidance.
|
||||
do_classifier_free_guidance = guidance_scale > 1.0
|
||||
|
||||
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
||||
image_embeds = self.prepare_ip_adapter_image_embeds(
|
||||
@@ -860,7 +707,7 @@ class StableDiffusionLDM3DPipeline(
|
||||
ip_adapter_image_embeds,
|
||||
device,
|
||||
batch_size * num_images_per_prompt,
|
||||
self.do_classifier_free_guidance,
|
||||
do_classifier_free_guidance,
|
||||
)
|
||||
|
||||
# 3. Encode input prompt
|
||||
@@ -868,7 +715,7 @@ class StableDiffusionLDM3DPipeline(
|
||||
prompt,
|
||||
device,
|
||||
num_images_per_prompt,
|
||||
self.do_classifier_free_guidance,
|
||||
do_classifier_free_guidance,
|
||||
negative_prompt,
|
||||
prompt_embeds=prompt_embeds,
|
||||
negative_prompt_embeds=negative_prompt_embeds,
|
||||
@@ -877,11 +724,12 @@ class StableDiffusionLDM3DPipeline(
|
||||
# For classifier free guidance, we need to do two forward passes.
|
||||
# Here we concatenate the unconditional and text embeddings into a single batch
|
||||
# to avoid doing two forward passes
|
||||
if self.do_classifier_free_guidance:
|
||||
if do_classifier_free_guidance:
|
||||
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
|
||||
|
||||
# 4. Prepare timesteps
|
||||
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
|
||||
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
||||
timesteps = self.scheduler.timesteps
|
||||
|
||||
# 5. Prepare latent variables
|
||||
num_channels_latents = self.unet.config.in_channels
|
||||
@@ -902,24 +750,12 @@ class StableDiffusionLDM3DPipeline(
|
||||
# 6.1 Add image embeds for IP-Adapter
|
||||
added_cond_kwargs = {"image_embeds": image_embeds} if ip_adapter_image is not None else None
|
||||
|
||||
# 6.2 Optionally get Guidance Scale Embedding
|
||||
timestep_cond = None
|
||||
if self.unet.config.time_cond_proj_dim is not None:
|
||||
guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
|
||||
timestep_cond = self.get_guidance_scale_embedding(
|
||||
guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
|
||||
).to(device=device, dtype=latents.dtype)
|
||||
|
||||
# 7. Denoising loop
|
||||
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
||||
self._num_timesteps = len(timesteps)
|
||||
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
||||
for i, t in enumerate(timesteps):
|
||||
if self.interrupt:
|
||||
continue
|
||||
|
||||
# expand the latents if we are doing classifier free guidance
|
||||
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
||||
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
||||
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
||||
|
||||
# predict the noise residual
|
||||
@@ -927,34 +763,19 @@ class StableDiffusionLDM3DPipeline(
|
||||
latent_model_input,
|
||||
t,
|
||||
encoder_hidden_states=prompt_embeds,
|
||||
timestep_cond=timestep_cond,
|
||||
cross_attention_kwargs=cross_attention_kwargs,
|
||||
added_cond_kwargs=added_cond_kwargs,
|
||||
return_dict=False,
|
||||
)[0]
|
||||
|
||||
# perform guidance
|
||||
if self.do_classifier_free_guidance:
|
||||
if do_classifier_free_guidance:
|
||||
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
||||
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
||||
|
||||
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
||||
# Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
|
||||
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
||||
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
|
||||
|
||||
if callback_on_step_end is not None:
|
||||
callback_kwargs = {}
|
||||
for k in callback_on_step_end_tensor_inputs:
|
||||
callback_kwargs[k] = locals()[k]
|
||||
callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
|
||||
|
||||
latents = callback_outputs.pop("latents", latents)
|
||||
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
||||
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
|
||||
|
||||
# call the callback, if provided
|
||||
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
||||
progress_bar.update()
|
||||
|
||||
+5
-12
@@ -414,22 +414,15 @@ class StableDiffusionPanoramaPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -557,9 +550,9 @@ class StableDiffusionPanoramaPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
||||
|
||||
@@ -549,22 +549,15 @@ class StableDiffusionXLPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -678,9 +671,9 @@ class StableDiffusionXLPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
||||
|
||||
+5
-12
@@ -616,9 +616,9 @@ class StableDiffusionXLImg2ImgPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def get_timesteps(self, num_inference_steps, strength, device, denoising_start=None):
|
||||
@@ -782,22 +782,15 @@ class StableDiffusionXLImg2ImgPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
|
||||
+5
-12
@@ -486,22 +486,15 @@ class StableDiffusionXLInpaintPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -858,9 +851,9 @@ class StableDiffusionXLInpaintPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -563,22 +563,15 @@ class StableDiffusionXLAdapterPipeline(
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
@@ -693,9 +686,9 @@ class StableDiffusionXLAdapterPipeline(
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
elif ip_adapter_image_embeds[0].ndim != 3:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
||||
|
||||
@@ -69,7 +69,6 @@ from .import_utils import (
|
||||
is_note_seq_available,
|
||||
is_onnx_available,
|
||||
is_peft_available,
|
||||
is_pytest_available,
|
||||
is_scipy_available,
|
||||
is_tensorboard_available,
|
||||
is_torch_available,
|
||||
|
||||
@@ -278,13 +278,6 @@ try:
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_peft_available = False
|
||||
|
||||
_pytest_available = importlib.util.find_spec("pytest") is not None
|
||||
try:
|
||||
_pytest_version = importlib_metadata.version("pytest")
|
||||
logger.debug(f"Successfully imported pytest version {_pytest_version}")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_pytest_available = False
|
||||
|
||||
_torchvision_available = importlib.util.find_spec("torchvision") is not None
|
||||
try:
|
||||
_torchvision_version = importlib_metadata.version("torchvision")
|
||||
@@ -381,10 +374,6 @@ def is_peft_available():
|
||||
return _peft_available
|
||||
|
||||
|
||||
def is_pytest_available():
|
||||
return _pytest_available
|
||||
|
||||
|
||||
def is_torchvision_available():
|
||||
return _torchvision_available
|
||||
|
||||
|
||||
@@ -779,7 +779,7 @@ class CaptureLogger:
|
||||
>>> logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.py")
|
||||
>>> with CaptureLogger(logger) as cl:
|
||||
... logger.info(msg)
|
||||
>>> assert cl.out, msg + \n
|
||||
>>> assert cl.out, msg + "\n"
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2024 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This script is responsible for cleaning the list of doctests by making sure the entries all exist and are in
|
||||
alphabetical order.
|
||||
|
||||
Usage (from the root of the repo):
|
||||
|
||||
Check that the doctest list is properly sorted and all files exist (used in `make repo-consistency`):
|
||||
|
||||
```bash
|
||||
python utils/check_doctest_list.py
|
||||
```
|
||||
|
||||
Auto-sort the doctest list if it is not properly sorted (used in `make fix-copies`):
|
||||
|
||||
```bash
|
||||
python utils/check_doctest_list.py --fix_and_overwrite
|
||||
```
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
|
||||
|
||||
# All paths are set with the intent you should run this script from the root of the repo with the command
|
||||
# python utils/check_doctest_list.py
|
||||
REPO_PATH = "."
|
||||
DOCTEST_FILE_PATHS = ["not_doctested.txt"]
|
||||
|
||||
|
||||
def clean_doctest_list(doctest_file: str, overwrite: bool = False):
|
||||
"""
|
||||
Cleans the doctest in a given file.
|
||||
|
||||
Args:
|
||||
doctest_file (`str`):
|
||||
The path to the doctest file to check or clean.
|
||||
overwrite (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to fix problems. If `False`, will error when the file is not clean.
|
||||
"""
|
||||
non_existent_paths = []
|
||||
all_paths = []
|
||||
with open(doctest_file, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip().split(" ")[0]
|
||||
path = os.path.join(REPO_PATH, line)
|
||||
if not (os.path.isfile(path) or os.path.isdir(path)):
|
||||
non_existent_paths.append(line)
|
||||
all_paths.append(line)
|
||||
|
||||
if len(non_existent_paths) > 0:
|
||||
non_existent_paths = "\n".join([f"- {f}" for f in non_existent_paths])
|
||||
raise ValueError(f"`{doctest_file}` contains non-existent paths:\n{non_existent_paths}")
|
||||
|
||||
sorted_paths = sorted(all_paths)
|
||||
if all_paths != sorted_paths:
|
||||
if not overwrite:
|
||||
raise ValueError(
|
||||
f"Files in `{doctest_file}` are not in alphabetical order, run `make fix-copies` to fix "
|
||||
"this automatically."
|
||||
)
|
||||
with open(doctest_file, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(sorted_paths) + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
|
||||
args = parser.parse_args()
|
||||
|
||||
for doctest_file in DOCTEST_FILE_PATHS:
|
||||
doctest_file = os.path.join(REPO_PATH, "utils", doctest_file)
|
||||
clean_doctest_list(doctest_file, args.fix_and_overwrite)
|
||||
@@ -1,100 +0,0 @@
|
||||
docs/source/en/training/create_dataset.md
|
||||
docs/source/en/training/wuerstchen.md
|
||||
docs/source/en/training/adapt_a_model.md
|
||||
docs/source/en/training/text2image.md
|
||||
docs/source/en/training/custom_diffusion.md
|
||||
docs/source/en/training/sdxl.md
|
||||
docs/source/en/training/unconditional_training.md
|
||||
docs/source/en/training/overview.md
|
||||
docs/source/en/training/t2i_adapters.md
|
||||
docs/source/en/training/lcm_distill.md
|
||||
docs/source/en/training/instructpix2pix.md
|
||||
docs/source/en/training/kandinsky.md
|
||||
docs/source/en/training/lora.md
|
||||
docs/source/en/training/controlnet.md
|
||||
docs/source/en/training/dreambooth.md
|
||||
docs/source/en/training/ddpo.md
|
||||
docs/source/en/training/text_inversion.md
|
||||
docs/source/en/training/distributed_inference.md
|
||||
docs/source/en/optimization/torch2.0.md
|
||||
docs/source/en/optimization/coreml.md
|
||||
docs/source/en/optimization/tome.md
|
||||
docs/source/en/optimization/xformers.md
|
||||
docs/source/en/optimization/deepcache.md
|
||||
docs/source/en/optimization/fp16.md
|
||||
docs/source/en/optimization/memory.md
|
||||
docs/source/en/optimization/habana.md
|
||||
docs/source/en/optimization/open_vino.md
|
||||
docs/source/en/optimization/mps.md
|
||||
docs/source/en/optimization/opt_overview.md
|
||||
docs/source/en/optimization/onnx.md
|
||||
docs/source/en/tutorials/basic_training.md
|
||||
docs/source/ko/index.md
|
||||
docs/source/ko/quicktour.md
|
||||
docs/source/ko/in_translation.md
|
||||
docs/source/ko/installation.md
|
||||
docs/source/ko/stable_diffusion.md
|
||||
docs/source/ko/training/create_dataset.md
|
||||
docs/source/ko/training/wuerstchen.md
|
||||
docs/source/ko/training/adapt_a_model.md
|
||||
docs/source/ko/training/text2image.md
|
||||
docs/source/ko/training/custom_diffusion.md
|
||||
docs/source/ko/training/sdxl.md
|
||||
docs/source/ko/training/unconditional_training.md
|
||||
docs/source/ko/training/overview.md
|
||||
docs/source/ko/training/t2i_adapters.md
|
||||
docs/source/ko/training/lcm_distill.md
|
||||
docs/source/ko/training/instructpix2pix.md
|
||||
docs/source/ko/training/kandinsky.md
|
||||
docs/source/ko/training/lora.md
|
||||
docs/source/ko/training/controlnet.md
|
||||
docs/source/ko/training/dreambooth.md
|
||||
docs/source/ko/training/ddpo.md
|
||||
docs/source/ko/training/text_inversion.md
|
||||
docs/source/ko/training/distributed_inference.md
|
||||
docs/source/ko/optimization/torch2.0.md
|
||||
docs/source/ko/optimization/coreml.md
|
||||
docs/source/ko/optimization/tome.md
|
||||
docs/source/ko/optimization/xformers.md
|
||||
docs/source/ko/optimization/deepcache.md
|
||||
docs/source/ko/optimization/fp16.md
|
||||
docs/source/ko/optimization/memory.md
|
||||
docs/source/ko/optimization/habana.md
|
||||
docs/source/ko/optimization/open_vino.md
|
||||
docs/source/ko/optimization/mps.md
|
||||
docs/source/ko/optimization/opt_overview.md
|
||||
docs/source/ko/optimization/onnx.md
|
||||
docs/source/ko/api/pipelines/stable_diffusion/stable_diffusion_xl.md
|
||||
docs/source/ko/tutorials/basic_training.md
|
||||
docs/source/ko/using-diffusers/loading.md
|
||||
docs/source/ko/using-diffusers/unconditional_image_generation.md
|
||||
docs/source/ko/using-diffusers/depth2img.md
|
||||
docs/source/ko/using-diffusers/control_brightness.md
|
||||
docs/source/ko/using-diffusers/contribute_pipeline.md
|
||||
docs/source/ko/using-diffusers/img2img.md
|
||||
docs/source/ko/using-diffusers/weighted_prompts.md
|
||||
docs/source/ko/using-diffusers/schedulers.md
|
||||
docs/source/ko/using-diffusers/custom_pipeline_examples.md
|
||||
docs/source/ko/using-diffusers/using_safetensors.md
|
||||
docs/source/ko/using-diffusers/reproducibility.md
|
||||
docs/source/ko/using-diffusers/inpaint.md
|
||||
docs/source/ko/using-diffusers/conditional_image_generation.md
|
||||
docs/source/ko/using-diffusers/controlling_generation.md
|
||||
docs/source/ko/using-diffusers/reusing_seeds.md
|
||||
docs/source/ko/using-diffusers/textual_inversion_inference.md
|
||||
docs/source/ko/using-diffusers/loading_overview.md
|
||||
docs/source/ko/using-diffusers/custom_pipeline_overview.md
|
||||
docs/source/ko/using-diffusers/other-formats.md
|
||||
docs/source/ko/using-diffusers/stable_diffusion_jax_how_to.md
|
||||
docs/source/ko/using-diffusers/pipeline_overview.md
|
||||
docs/source/ko/using-diffusers/write_own_pipeline.md
|
||||
docs/source/pt/index.md
|
||||
docs/source/pt/quicktour.md
|
||||
docs/source/pt/in_translation.md
|
||||
docs/source/pt/installation.md
|
||||
docs/source/pt/stable_diffusion.md
|
||||
docs/source/ja/index.md
|
||||
docs/source/ja/quicktour.md
|
||||
docs/source/ja/in_translation.md
|
||||
docs/source/ja/installation.md
|
||||
docs/source/ja/stable_diffusion.md
|
||||
@@ -1,401 +0,0 @@
|
||||
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import collections
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from fnmatch import fnmatch
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
from slack_sdk import WebClient
|
||||
|
||||
|
||||
client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
|
||||
|
||||
|
||||
def handle_test_results(test_results):
|
||||
expressions = test_results.split(" ")
|
||||
|
||||
failed = 0
|
||||
success = 0
|
||||
|
||||
# When the output is short enough, the output is surrounded by = signs: "== OUTPUT =="
|
||||
# When it is too long, those signs are not present.
|
||||
time_spent = expressions[-2] if "=" in expressions[-1] else expressions[-1]
|
||||
|
||||
for i, expression in enumerate(expressions):
|
||||
if "failed" in expression:
|
||||
failed += int(expressions[i - 1])
|
||||
if "passed" in expression:
|
||||
success += int(expressions[i - 1])
|
||||
|
||||
return failed, success, time_spent
|
||||
|
||||
|
||||
def extract_first_line_failure(failures_short_lines):
|
||||
failures = {}
|
||||
file = None
|
||||
in_error = False
|
||||
for line in failures_short_lines.split("\n"):
|
||||
if re.search(r"_ \[doctest\]", line):
|
||||
in_error = True
|
||||
file = line.split(" ")[2]
|
||||
elif in_error and not line.split(" ")[0].isdigit():
|
||||
failures[file] = line
|
||||
in_error = False
|
||||
|
||||
return failures
|
||||
|
||||
|
||||
class Message:
|
||||
def __init__(self, title: str, doc_test_results: Dict):
|
||||
self.title = title
|
||||
|
||||
self._time_spent = doc_test_results["time_spent"].split(",")[0]
|
||||
self.n_success = doc_test_results["success"]
|
||||
self.n_failures = doc_test_results["failures"]
|
||||
self.n_tests = self.n_success + self.n_failures
|
||||
|
||||
# Failures and success of the modeling tests
|
||||
self.doc_test_results = doc_test_results
|
||||
|
||||
@property
|
||||
def time(self) -> str:
|
||||
time_spent = [self._time_spent]
|
||||
total_secs = 0
|
||||
|
||||
for time in time_spent:
|
||||
time_parts = time.split(":")
|
||||
|
||||
# Time can be formatted as xx:xx:xx, as .xx, or as x.xx if the time spent was less than a minute.
|
||||
if len(time_parts) == 1:
|
||||
time_parts = [0, 0, time_parts[0]]
|
||||
|
||||
hours, minutes, seconds = int(time_parts[0]), int(time_parts[1]), float(time_parts[2])
|
||||
total_secs += hours * 3600 + minutes * 60 + seconds
|
||||
|
||||
hours, minutes, seconds = total_secs // 3600, (total_secs % 3600) // 60, total_secs % 60
|
||||
return f"{int(hours)}h{int(minutes)}m{int(seconds)}s"
|
||||
|
||||
@property
|
||||
def header(self) -> Dict:
|
||||
return {"type": "header", "text": {"type": "plain_text", "text": self.title}}
|
||||
|
||||
@property
|
||||
def no_failures(self) -> Dict:
|
||||
return {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": f"🌞 There were no failures: all {self.n_tests} tests passed. The suite ran in {self.time}.",
|
||||
"emoji": True,
|
||||
},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
|
||||
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
|
||||
},
|
||||
}
|
||||
|
||||
@property
|
||||
def failures(self) -> Dict:
|
||||
return {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": (
|
||||
f"There were {self.n_failures} failures, out of {self.n_tests} tests.\nThe suite ran in"
|
||||
f" {self.time}."
|
||||
),
|
||||
"emoji": True,
|
||||
},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
|
||||
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
|
||||
},
|
||||
}
|
||||
|
||||
@property
|
||||
def category_failures(self) -> List[Dict]:
|
||||
failure_blocks = []
|
||||
|
||||
MAX_ERROR_TEXT = 3000 - len("The following examples had failures:\n\n\n\n") - len("[Truncated]\n")
|
||||
line_length = 40
|
||||
category_failures = {k: v["failed"] for k, v in doc_test_results.items() if isinstance(v, dict)}
|
||||
|
||||
def single_category_failures(category, failures):
|
||||
text = ""
|
||||
if len(failures) == 0:
|
||||
return ""
|
||||
text += f"*{category} failures*:".ljust(line_length // 2).rjust(line_length // 2) + "\n"
|
||||
|
||||
for idx, failure in enumerate(failures):
|
||||
new_text = text + f"`{failure}`\n"
|
||||
if len(new_text) > MAX_ERROR_TEXT:
|
||||
text = text + "[Truncated]\n"
|
||||
break
|
||||
text = new_text
|
||||
|
||||
return text
|
||||
|
||||
for category, failures in category_failures.items():
|
||||
report = single_category_failures(category, failures)
|
||||
if len(report) == 0:
|
||||
continue
|
||||
block = {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": f"The following examples had failures:\n\n\n{report}\n",
|
||||
},
|
||||
}
|
||||
failure_blocks.append(block)
|
||||
|
||||
return failure_blocks
|
||||
|
||||
@property
|
||||
def payload(self) -> str:
|
||||
blocks = [self.header]
|
||||
|
||||
if self.n_failures > 0:
|
||||
blocks.append(self.failures)
|
||||
|
||||
if self.n_failures > 0:
|
||||
blocks.extend(self.category_failures)
|
||||
|
||||
if self.n_failures == 0:
|
||||
blocks.append(self.no_failures)
|
||||
|
||||
return json.dumps(blocks)
|
||||
|
||||
@staticmethod
|
||||
def error_out():
|
||||
payload = [
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": "There was an issue running the tests.",
|
||||
},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
|
||||
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
print("Sending the following payload")
|
||||
print(json.dumps({"blocks": json.loads(payload)}))
|
||||
|
||||
client.chat_postMessage(
|
||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
||||
text="There was an issue running the tests.",
|
||||
blocks=payload,
|
||||
)
|
||||
|
||||
def post(self):
|
||||
print("Sending the following payload")
|
||||
print(json.dumps({"blocks": json.loads(self.payload)}))
|
||||
|
||||
text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."
|
||||
|
||||
self.thread_ts = client.chat_postMessage(
|
||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
||||
blocks=self.payload,
|
||||
text=text,
|
||||
)
|
||||
|
||||
def get_reply_blocks(self, job_name, job_link, failures, text):
|
||||
# `text` must be less than 3001 characters in Slack SDK
|
||||
# keep some room for adding "[Truncated]" when necessary
|
||||
MAX_ERROR_TEXT = 3000 - len("[Truncated]")
|
||||
|
||||
failure_text = ""
|
||||
for key, value in failures.items():
|
||||
new_text = failure_text + f"*{key}*\n_{value}_\n\n"
|
||||
if len(new_text) > MAX_ERROR_TEXT:
|
||||
# `failure_text` here has length <= 3000
|
||||
failure_text = failure_text + "[Truncated]"
|
||||
break
|
||||
# `failure_text` here has length <= MAX_ERROR_TEXT
|
||||
failure_text = new_text
|
||||
|
||||
title = job_name
|
||||
content = {"type": "section", "text": {"type": "mrkdwn", "text": text}}
|
||||
|
||||
if job_link is not None:
|
||||
content["accessory"] = {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "GitHub Action job", "emoji": True},
|
||||
"url": job_link,
|
||||
}
|
||||
|
||||
return [
|
||||
{"type": "header", "text": {"type": "plain_text", "text": title.upper(), "emoji": True}},
|
||||
content,
|
||||
{"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
|
||||
]
|
||||
|
||||
def post_reply(self):
|
||||
if self.thread_ts is None:
|
||||
raise ValueError("Can only post reply if a post has been made.")
|
||||
|
||||
job_link = self.doc_test_results.pop("job_link")
|
||||
self.doc_test_results.pop("failures")
|
||||
self.doc_test_results.pop("success")
|
||||
self.doc_test_results.pop("time_spent")
|
||||
|
||||
sorted_dict = sorted(self.doc_test_results.items(), key=lambda t: t[0])
|
||||
for job, job_result in sorted_dict:
|
||||
if len(job_result["failures"]):
|
||||
text = f"*Num failures* :{len(job_result['failed'])} \n"
|
||||
failures = job_result["failures"]
|
||||
blocks = self.get_reply_blocks(job, job_link, failures, text=text)
|
||||
|
||||
print("Sending the following reply")
|
||||
print(json.dumps({"blocks": blocks}))
|
||||
|
||||
client.chat_postMessage(
|
||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
||||
text=f"Results for {job}",
|
||||
blocks=blocks,
|
||||
thread_ts=self.thread_ts["ts"],
|
||||
)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def get_job_links():
|
||||
run_id = os.environ["GITHUB_RUN_ID"]
|
||||
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}/jobs?per_page=100"
|
||||
result = requests.get(url).json()
|
||||
jobs = {}
|
||||
|
||||
try:
|
||||
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
|
||||
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
|
||||
|
||||
for i in range(pages_to_iterate_over):
|
||||
result = requests.get(url + f"&page={i + 2}").json()
|
||||
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
|
||||
|
||||
return jobs
|
||||
except Exception as e:
|
||||
print("Unknown error, could not fetch links.", e)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def retrieve_artifact(name: str):
|
||||
_artifact = {}
|
||||
|
||||
if os.path.exists(name):
|
||||
files = os.listdir(name)
|
||||
for file in files:
|
||||
try:
|
||||
with open(os.path.join(name, file), encoding="utf-8") as f:
|
||||
_artifact[file.split(".")[0]] = f.read()
|
||||
except UnicodeDecodeError as e:
|
||||
raise ValueError(f"Could not open {os.path.join(name, file)}.") from e
|
||||
|
||||
return _artifact
|
||||
|
||||
|
||||
def retrieve_available_artifacts():
|
||||
class Artifact:
|
||||
def __init__(self, name: str):
|
||||
self.name = name
|
||||
self.paths = []
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
def add_path(self, path: str):
|
||||
self.paths.append({"name": self.name, "path": path})
|
||||
|
||||
_available_artifacts: Dict[str, Artifact] = {}
|
||||
|
||||
directories = filter(os.path.isdir, os.listdir())
|
||||
for directory in directories:
|
||||
artifact_name = directory
|
||||
if artifact_name not in _available_artifacts:
|
||||
_available_artifacts[artifact_name] = Artifact(artifact_name)
|
||||
|
||||
_available_artifacts[artifact_name].add_path(directory)
|
||||
|
||||
return _available_artifacts
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
github_actions_job_links = get_job_links()
|
||||
available_artifacts = retrieve_available_artifacts()
|
||||
|
||||
docs = collections.OrderedDict(
|
||||
[
|
||||
("*.py", "API Examples"),
|
||||
("*.md", "MD Examples"),
|
||||
]
|
||||
)
|
||||
|
||||
# This dict will contain all the information relative to each doc test category:
|
||||
# - failed: list of failed tests
|
||||
# - failures: dict in the format 'test': 'error_message'
|
||||
doc_test_results = {
|
||||
v: {
|
||||
"failed": [],
|
||||
"failures": {},
|
||||
}
|
||||
for v in docs.values()
|
||||
}
|
||||
|
||||
# Link to the GitHub Action job
|
||||
doc_test_results["job_link"] = github_actions_job_links.get("run_doctests")
|
||||
|
||||
artifact_path = available_artifacts["doc_tests_gpu_test_reports"].paths[0]
|
||||
artifact = retrieve_artifact(artifact_path["name"])
|
||||
if "stats" in artifact:
|
||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||
doc_test_results["failures"] = failed
|
||||
doc_test_results["success"] = success
|
||||
doc_test_results["time_spent"] = time_spent[1:-1] + ", "
|
||||
|
||||
all_failures = extract_first_line_failure(artifact["failures_short"])
|
||||
for line in artifact["summary_short"].split("\n"):
|
||||
if re.search("FAILED", line):
|
||||
line = line.replace("FAILED ", "")
|
||||
line = line.split()[0].replace("\n", "")
|
||||
|
||||
if "::" in line:
|
||||
file_path, test = line.split("::")
|
||||
else:
|
||||
file_path, test = line, line
|
||||
|
||||
for file_regex in docs.keys():
|
||||
if fnmatch(file_path, file_regex):
|
||||
category = docs[file_regex]
|
||||
doc_test_results[category]["failed"].append(test)
|
||||
|
||||
failure = all_failures[test] if test in all_failures else "N/A"
|
||||
doc_test_results[category]["failures"][test] = failure
|
||||
break
|
||||
|
||||
message = Message("🤗 Results of the doc tests.", doc_test_results)
|
||||
message.post()
|
||||
message.post_reply()
|
||||
+2
-2
@@ -39,7 +39,7 @@ def main():
|
||||
open_issues = repo.get_issues(state="open")
|
||||
|
||||
for issue in open_issues:
|
||||
labels = [label.name.lower() for label in issue.get_labels()]
|
||||
labels = [label.name for label in issue.get_labels()]
|
||||
if "stale" in labels:
|
||||
comments = sorted(issue.get_comments(), key=lambda i: i.created_at, reverse=True)
|
||||
last_comment = comments[0] if len(comments) > 0 else None
|
||||
@@ -50,7 +50,7 @@ def main():
|
||||
elif (
|
||||
(dt.now(timezone.utc) - issue.updated_at).days > 23
|
||||
and (dt.now(timezone.utc) - issue.created_at).days >= 30
|
||||
and not any(label in LABELS_TO_EXEMPT for label in labels)
|
||||
and not any(label.name.lower() in LABELS_TO_EXEMPT for label in labels)
|
||||
):
|
||||
# Post a Stalebot notification after 23 days of inactivity.
|
||||
issue.create_comment(
|
||||
|
||||
Reference in New Issue
Block a user