Compare commits

..

1 Commits

Author SHA1 Message Date
Dhruv Nair 74ce30c11f update 2024-03-01 04:49:56 +00:00
49 changed files with 179 additions and 1835 deletions
-1
View File
@@ -1,7 +1,6 @@
name: Benchmarking tests
on:
workflow_dispatch:
schedule:
- cron: "30 1 1,15 * *" # every 2 weeks on the 1st and the 15th of every month at 1:30 AM
-28
View File
@@ -1,28 +0,0 @@
name: Check for Broken Links
on:
repository_dispatch:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
jobs:
check_for_broken_links:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Link Checker
id: lychee
uses: lycheeverse/lychee-action@v1
with:
args: './**/*.md'
fail: true
- name: Create Issue From File
if: env.lychee_exit_code != 0
uses: diffusers/create-issue-from-file@v4
with:
title: Link Checker Report
content-filepath: ./lychee/out.md
labels: report, automated issue
-80
View File
@@ -1,80 +0,0 @@
name: Doctests
on:
push:
branches:
- doctest*
repository_dispatch:
schedule:
- cron: "0 0 * * *"
env:
HF_HOME: /mnt/cache
RUN_SLOW: yes
OMP_NUM_THREADS: 16
MKL_NUM_THREADS: 16
jobs:
run_doctests:
runs-on: [single-gpu, nvidia-gpu, a10, ci]
container:
image: huggingface/diffusers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: NVIDIA-SMI
uses: actions/checkout@v3
run: |
nvidia-smi
- name: Install dependencies
run: python3 -m pip install -e .[quality,test,training]
- name: Environment
run: |
python3 utils/print_env.py
- name: Get doctest files
run: |
$(python3 -c 'from utils.tests_fetcher import get_all_doctest_files; to_test = get_all_doctest_files(); to_test = " ".join(to_test); fp = open("doc_tests.txt", "w"); fp.write(to_test); fp.close()')
- name: Run doctests
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat reports/doc_tests_gpu/failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: doc_tests_gpu_test_reports
path: reports/doc_tests_gpu
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [run_doctests]
steps:
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
run: |
pip install slack_sdk
python utils/notification_service_doc_tests.py
-2
View File
@@ -36,7 +36,6 @@ repo-consistency:
python utils/check_dummies.py
python utils/check_repo.py
python utils/check_inits.py
python utils/check_doctest_list.py
# this target runs checks on all files
@@ -68,7 +67,6 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
fix-copies:
python utils/check_copies.py --fix_and_overwrite
python utils/check_dummies.py --fix_and_overwrite
python utils/check_doctest_list.py --fix_and_overwrite
# Run tests for the library
-30
View File
@@ -141,7 +141,6 @@ class LCMLoRATextToImageBenchmark(TextToImageBenchmark):
super().__init__(args)
self.pipe.load_lora_weights(self.lora_id)
self.pipe.fuse_lora()
self.pipe.unload_lora_weights()
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
def get_result_filepath(self, args):
@@ -236,35 +235,6 @@ class InpaintingBenchmark(ImageToImageBenchmark):
)
class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png"
image = load_image(url)
def __init__(self, args):
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16).to("cuda")
pipe.load_ip_adapter(
args.ip_adapter_id[0],
subfolder="models" if "sdxl" not in args.ip_adapter_id[1] else "sdxl_models",
weight_name=args.ip_adapter_id[1],
)
if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.set_progress_bar_config(disable=True)
self.pipe = pipe
def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
ip_adapter_image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)
class ControlNetBenchmark(TextToImageBenchmark):
pipeline_class = StableDiffusionControlNetPipeline
aux_network_class = ControlNetModel
-32
View File
@@ -1,32 +0,0 @@
import argparse
import sys
sys.path.append(".")
from base_classes import IPAdapterTextToImageBenchmark # noqa: E402
IP_ADAPTER_CKPTS = {
"runwayml/stable-diffusion-v1-5": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="runwayml/stable-diffusion-v1-5",
choices=list(IP_ADAPTER_CKPTS.keys()),
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()
args.ip_adapter_id = IP_ADAPTER_CKPTS[args.ckpt]
benchmark_pipe = IPAdapterTextToImageBenchmark(args)
args.ckpt = f"{args.ckpt} (IP-Adapter)"
benchmark_pipe.benchmark(args)
+1 -1
View File
@@ -72,7 +72,7 @@ def main():
command += " --run_compile"
run_command(command.split())
elif file in ["benchmark_sd_inpainting.py", "benchmark_ip_adapters.py"]:
elif file == "benchmark_sd_inpainting.py":
sdxl_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
command = f"python {file} --ckpt {sdxl_ckpt}"
run_command(command.split())
+4 -4
View File
@@ -1,4 +1,4 @@
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
@@ -24,9 +24,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3 -m uv pip install --no-cache-dir \
torch \
torchvision \
torchaudio \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
"onnxruntime-gpu>=1.13.1" \
--extra-index-url https://download.pytorch.org/whl/cu117 && \
python3 -m uv pip install --no-cache-dir \
@@ -169,7 +169,7 @@ list_adapters_component_wise
If you want to compile your model with `torch.compile` make sure to first fuse the LoRA weights into the base model and unload them.
```diff
```py
pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
@@ -178,16 +178,12 @@ pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0])
pipe.fuse_lora()
pipe.unload_lora_weights()
+ pipe.unet.to(memory_format=torch.channels_last)
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe = torch.compile(pipe)
prompt = "toy_face of a hacker with a hoodie, pixel art"
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
```
> [!TIP]
> You can refer to the `torch.compile()` section [here](https://huggingface.co/docs/diffusers/main/en/optimization/torch2.0#torchcompile) and [here](https://huggingface.co/docs/diffusers/main/en/tutorials/fast_diffusion#torchcompile) for more elaborate examples.
## Fusing adapters into the model
You can use PEFT to easily fuse/unfuse multiple adapters directly into the model weights (both UNet and text encoder) using the [`~diffusers.loaders.LoraLoaderMixin.fuse_lora`] method, which can lead to a speed-up in inference and lower VRAM usage.
+2 -27
View File
@@ -80,7 +80,8 @@ To do so, just specify `--train_text_encoder_ti` while launching training (for r
Please keep the following points in mind:
* SDXL has two text encoders. So, we fine-tune both using LoRA.
* When not fine-tuning the text encoders, we ALWAYS precompute the text embeddings to save memory.
* When not fine-tuning the text encoders, we ALWAYS precompute the text embeddings to save memoםהקרry.
### 3D icon example
@@ -233,32 +234,6 @@ In ComfyUI we will load a LoRA and a textual embedding at the same time.
SDXL's VAE is known to suffer from numerical instability issues. This is why we also expose a CLI argument namely `--pretrained_vae_model_name_or_path` that lets you specify the location of a better VAE (such as [this one](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix)).
### DoRA training
The advanced script now supports DoRA training too!
> Proposed in [DoRA: Weight-Decomposed Low-Rank Adaptation](https://arxiv.org/abs/2402.09353),
**DoRA** is very similar to LoRA, except it decomposes the pre-trained weight into two components, **magnitude** and **direction** and employs LoRA for _directional_ updates to efficiently minimize the number of trainable parameters.
The authors found that by using DoRA, both the learning capacity and training stability of LoRA are enhanced without any additional overhead during inference.
> [!NOTE]
> 💡DoRA training is still _experimental_
> and is likely to require different hyperparameter values to perform best compared to a LoRA.
> Specifically, we've noticed 2 differences to take into account your training:
> 1. **LoRA seem to converge faster than DoRA** (so a set of parameters that may lead to overfitting when training a LoRA may be working well for a DoRA)
> 2. **DoRA quality superior to LoRA especially in lower ranks** the difference in quality of DoRA of rank 8 and LoRA of rank 8 appears to be more significant than when training ranks of 32 or 64 for example.
> This is also aligned with some of the quantitative analysis shown in the paper.
**Usage**
1. To use DoRA you need to install `peft` from main:
```bash
pip install git+https://github.com/huggingface/peft.git
```
2. Enable DoRA training by adding this flag
```bash
--use_dora
```
**Inference**
The inference is the same as if you train a regular LoRA 🤗
### Tips and Tricks
Check out [these recommended practices](https://huggingface.co/blog/sdxl_lora_advanced_script#additional-good-practices)
@@ -651,16 +651,6 @@ def parse_args(input_args=None):
default=4,
help=("The dimension of the LoRA update matrices."),
)
parser.add_argument(
"--use_dora",
type=bool,
action="store_true",
default=False,
help=(
"Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. "
"Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`"
),
)
parser.add_argument(
"--cache_latents",
action="store_true",
@@ -1229,7 +1219,6 @@ def main(args):
unet_lora_config = LoraConfig(
r=args.rank,
lora_alpha=args.rank,
use_dora=args.use_dora,
init_lora_weights="gaussian",
target_modules=["to_k", "to_q", "to_v", "to_out.0"],
)
@@ -1241,7 +1230,6 @@ def main(args):
text_lora_config = LoraConfig(
r=args.rank,
lora_alpha=args.rank,
use_dora=args.use_dora,
init_lora_weights="gaussian",
target_modules=["q_proj", "k_proj", "v_proj", "out_proj"],
)
@@ -661,16 +661,6 @@ def parse_args(input_args=None):
default=4,
help=("The dimension of the LoRA update matrices."),
)
parser.add_argument(
"--use_dora",
type=bool,
action="store_true",
default=False,
help=(
"Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. "
"Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`"
),
)
parser.add_argument(
"--cache_latents",
action="store_true",
@@ -1333,7 +1323,6 @@ def main(args):
unet_lora_config = LoraConfig(
r=args.rank,
lora_alpha=args.rank,
use_dora=args.use_dora,
init_lora_weights="gaussian",
target_modules=["to_k", "to_q", "to_v", "to_out.0"],
)
@@ -1345,7 +1334,6 @@ def main(args):
text_lora_config = LoraConfig(
r=args.rank,
lora_alpha=args.rank,
use_dora=args.use_dora,
init_lora_weights="gaussian",
target_modules=["q_proj", "k_proj", "v_proj", "out_proj"],
)
-37
View File
@@ -206,40 +206,3 @@ You can explore the results from a couple of our internal experiments by checkin
## Running on a free-tier Colab Notebook
Check out [this notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/SDXL_DreamBooth_LoRA_.ipynb).
## Conducting EDM-style training
It's now possible to perform EDM-style training as proposed in [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364).
For the SDXL model, simple set:
```diff
+ --do_edm_style_training \
```
Other SDXL-like models that use the EDM formulation, such as [playgroundai/playground-v2.5-1024px-aesthetic](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic), can also be DreamBooth'd with the script. Below is an example command:
```bash
accelerate launch train_dreambooth_lora_sdxl.py \
--pretrained_model_name_or_path="playgroundai/playground-v2.5-1024px-aesthetic" \
--instance_data_dir="dog" \
--output_dir="dog-playground-lora" \
--mixed_precision="fp16" \
--instance_prompt="a photo of sks dog" \
--resolution=1024 \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--learning_rate=1e-4 \
--use_8bit_adam \
--report_to="wandb" \
--lr_scheduler="constant" \
--lr_warmup_steps=0 \
--max_train_steps=500 \
--validation_prompt="A photo of sks dog in a bucket" \
--validation_epochs=25 \
--seed="0" \
--push_to_hub
```
> [!CAUTION]
> Min-SNR gamma is not supported with the EDM-style training yet. When training with the PlaygroundAI model, it's recommended to not pass any "variant".
@@ -1,99 +0,0 @@
# coding=utf-8
# Copyright 2024 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import sys
import tempfile
import safetensors
sys.path.append("..")
from test_examples_utils import ExamplesTestsAccelerate, run_command # noqa: E402
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
class DreamBoothLoRASDXLWithEDM(ExamplesTestsAccelerate):
def test_dreambooth_lora_sdxl_with_edm(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/dreambooth/train_dreambooth_lora_sdxl.py
--pretrained_model_name_or_path hf-internal-testing/tiny-stable-diffusion-xl-pipe
--do_edm_style_training
--instance_data_dir docs/source/en/imgs
--instance_prompt photo
--resolution 64
--train_batch_size 1
--gradient_accumulation_steps 1
--max_train_steps 2
--learning_rate 5.0e-04
--scale_lr
--lr_scheduler constant
--lr_warmup_steps 0
--output_dir {tmpdir}
""".split()
run_command(self._launch_args + test_args)
# save_pretrained smoke test
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))
# make sure the state_dict has the correct naming in the parameters.
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
is_lora = all("lora" in k for k in lora_state_dict.keys())
self.assertTrue(is_lora)
# when not training the text encoder, all the parameters in the state dict should start
# with `"unet"` in their names.
starts_with_unet = all(key.startswith("unet") for key in lora_state_dict.keys())
self.assertTrue(starts_with_unet)
def test_dreambooth_lora_playground(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/dreambooth/train_dreambooth_lora_sdxl.py
--pretrained_model_name_or_path hf-internal-testing/tiny-playground-v2-5-pipe
--instance_data_dir docs/source/en/imgs
--instance_prompt photo
--resolution 64
--train_batch_size 1
--gradient_accumulation_steps 1
--max_train_steps 2
--learning_rate 5.0e-04
--scale_lr
--lr_scheduler constant
--lr_warmup_steps 0
--output_dir {tmpdir}
""".split()
run_command(self._launch_args + test_args)
# save_pretrained smoke test
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))
# make sure the state_dict has the correct naming in the parameters.
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
is_lora = all("lora" in k for k in lora_state_dict.keys())
self.assertTrue(is_lora)
# when not training the text encoder, all the parameters in the state dict should start
# with `"unet"` in their names.
starts_with_unet = all(key.startswith("unet") for key in lora_state_dict.keys())
self.assertTrue(starts_with_unet)
+30 -172
View File
@@ -14,10 +14,8 @@
# See the License for the specific language governing permissions and
import argparse
import contextlib
import gc
import itertools
import json
import logging
import math
import os
@@ -34,7 +32,7 @@ import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed
from huggingface_hub import create_repo, hf_hub_download, upload_folder
from huggingface_hub import create_repo, upload_folder
from huggingface_hub.utils import insecure_hashlib
from packaging import version
from peft import LoraConfig, set_peft_model_state_dict
@@ -52,8 +50,6 @@ from diffusers import (
AutoencoderKL,
DDPMScheduler,
DPMSolverMultistepScheduler,
EDMEulerScheduler,
EulerDiscreteScheduler,
StableDiffusionXLPipeline,
UNet2DConditionModel,
)
@@ -80,20 +76,6 @@ check_min_version("0.27.0.dev0")
logger = get_logger(__name__)
def determine_scheduler_type(pretrained_model_name_or_path, revision):
model_index_filename = "model_index.json"
if os.path.isdir(pretrained_model_name_or_path):
model_index = os.path.join(pretrained_model_name_or_path, model_index_filename)
else:
model_index = hf_hub_download(
repo_id=pretrained_model_name_or_path, filename=model_index_filename, revision=revision
)
with open(model_index, "r") as f:
scheduler_type = json.load(f)["scheduler"][1]
return scheduler_type
def save_model_card(
repo_id: str,
images=None,
@@ -113,7 +95,7 @@ def save_model_card(
)
model_description = f"""
# {'SDXL' if 'playgroundai' not in base_model else 'Playground'} LoRA DreamBooth - {repo_id}
# SDXL LoRA DreamBooth - {repo_id}
<Gallery />
@@ -137,17 +119,11 @@ Weights for this model are available in Safetensors format.
[Download]({repo_id}/tree/main) them in the Files & versions tab.
"""
if "playgroundai" in args.pretrained_model_name_or_path:
model_description += """\n
## License
Please adhere to the licensing terms as described [here](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic/blob/main/LICENSE.md).
"""
model_card = load_or_create_model_card(
repo_id_or_path=repo_id,
from_training=True,
license="openrail++" if "playgroundai" not in base_model else "playground-v2dot5-community",
license="openrail++",
base_model=base_model,
prompt=instance_prompt,
model_description=model_description,
@@ -155,17 +131,15 @@ Please adhere to the licensing terms as described [here](https://huggingface.co/
)
tags = [
"text-to-image",
"stable-diffusion-xl",
"stable-diffusion-xl-diffusers",
"text-to-image",
"diffusers",
"lora",
"template:sd-lora",
]
if "playgroundai" in base_model:
tags.extend(["playground", "playground-diffusers"])
else:
tags.extend(["stable-diffusion-xl", "stable-diffusion-xl-diffusers"])
model_card = populate_model_card(model_card, tags=tags)
model_card.save(os.path.join(repo_folder, "README.md"))
@@ -185,29 +159,23 @@ def log_validation(
# We train on the simplified learning objective. If we were previously predicting a variance, we need the scheduler to ignore it
scheduler_args = {}
if not args.do_edm_style_training:
if "variance_type" in pipeline.scheduler.config:
variance_type = pipeline.scheduler.config.variance_type
if "variance_type" in pipeline.scheduler.config:
variance_type = pipeline.scheduler.config.variance_type
if variance_type in ["learned", "learned_range"]:
variance_type = "fixed_small"
if variance_type in ["learned", "learned_range"]:
variance_type = "fixed_small"
scheduler_args["variance_type"] = variance_type
scheduler_args["variance_type"] = variance_type
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, **scheduler_args)
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, **scheduler_args)
pipeline = pipeline.to(accelerator.device)
pipeline.set_progress_bar_config(disable=True)
# run inference
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
# Currently the context determination is a bit hand-wavy. We can improve it in the future if there's a better
# way to condition it. Reference: https://github.com/huggingface/diffusers/pull/7126#issuecomment-1968523051
inference_ctx = (
contextlib.nullcontext() if "playgroundai" in args.pretrained_model_name_or_path else torch.cuda.amp.autocast()
)
with inference_ctx:
with torch.cuda.amp.autocast():
images = [pipeline(**pipeline_args, generator=generator).images[0] for _ in range(args.num_validation_images)]
for tracker in accelerator.trackers:
@@ -366,12 +334,6 @@ def parse_args(input_args=None):
" `args.validation_prompt` multiple times: `args.num_validation_images`."
),
)
parser.add_argument(
"--do_edm_style_training",
default=False,
action="store_true",
help="Flag to conduct training using the EDM formulation as introduced in https://arxiv.org/abs/2206.00364.",
)
parser.add_argument(
"--with_prior_preservation",
default=False,
@@ -943,9 +905,6 @@ def main(args):
" Please use `huggingface-cli login` to authenticate with the Hub."
)
if args.do_edm_style_training and args.snr_gamma is not None:
raise ValueError("Min-SNR formulation is not supported when conducting EDM-style training.")
logging_dir = Path(args.output_dir, args.logging_dir)
accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
@@ -1059,19 +1018,7 @@ def main(args):
)
# Load scheduler and models
scheduler_type = determine_scheduler_type(args.pretrained_model_name_or_path, args.revision)
if "EDM" in scheduler_type:
args.do_edm_style_training = True
noise_scheduler = EDMEulerScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
logger.info("Performing EDM-style training!")
elif args.do_edm_style_training:
noise_scheduler = EulerDiscreteScheduler.from_pretrained(
args.pretrained_model_name_or_path, subfolder="scheduler"
)
logger.info("Performing EDM-style training!")
else:
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
text_encoder_one = text_encoder_cls_one.from_pretrained(
args.pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, variant=args.variant
)
@@ -1089,12 +1036,6 @@ def main(args):
revision=args.revision,
variant=args.variant,
)
latents_mean = latents_std = None
if hasattr(vae.config, "latents_mean") and vae.config.latents_mean is not None:
latents_mean = torch.tensor(vae.config.latents_mean).view(1, 4, 1, 1)
if hasattr(vae.config, "latents_std") and vae.config.latents_std is not None:
latents_std = torch.tensor(vae.config.latents_std).view(1, 4, 1, 1)
unet = UNet2DConditionModel.from_pretrained(
args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, variant=args.variant
)
@@ -1237,7 +1178,7 @@ def main(args):
_set_state_dict_into_text_encoder(lora_state_dict, prefix="text_encoder.", text_encoder=text_encoder_one_)
_set_state_dict_into_text_encoder(
lora_state_dict, prefix="text_encoder_2.", text_encoder=text_encoder_two_
lora_state_dict, prefix="text_encoder_2.", text_encoder=text_encoder_one_
)
# Make sure the trainable params are in float32. This is again needed since the base models
@@ -1492,12 +1433,7 @@ def main(args):
# We need to initialize the trackers we use, and also store our configuration.
# The trackers initializes automatically on the main process.
if accelerator.is_main_process:
tracker_name = (
"dreambooth-lora-sd-xl"
if "playgroundai" not in args.pretrained_model_name_or_path
else "dreambooth-lora-playground"
)
accelerator.init_trackers(tracker_name, config=vars(args))
accelerator.init_trackers("dreambooth-lora-sd-xl", config=vars(args))
# Train!
total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
@@ -1549,18 +1485,6 @@ def main(args):
disable=not accelerator.is_local_main_process,
)
def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
sigmas = noise_scheduler.sigmas.to(device=accelerator.device, dtype=dtype)
schedule_timesteps = noise_scheduler.timesteps.to(accelerator.device)
timesteps = timesteps.to(accelerator.device)
step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
sigma = sigmas[step_indices].flatten()
while len(sigma.shape) < n_dim:
sigma = sigma.unsqueeze(-1)
return sigma
for epoch in range(first_epoch, args.num_train_epochs):
unet.train()
if args.train_text_encoder:
@@ -1588,46 +1512,22 @@ def main(args):
# Convert images to latent space
model_input = vae.encode(pixel_values).latent_dist.sample()
if latents_mean is None and latents_std is None:
model_input = model_input * vae.config.scaling_factor
if args.pretrained_vae_model_name_or_path is None:
model_input = model_input.to(weight_dtype)
else:
latents_mean = latents_mean.to(device=model_input.device, dtype=model_input.dtype)
latents_std = latents_std.to(device=model_input.device, dtype=model_input.dtype)
model_input = (model_input - latents_mean) * vae.config.scaling_factor / latents_std
model_input = model_input.to(dtype=weight_dtype)
model_input = model_input * vae.config.scaling_factor
if args.pretrained_vae_model_name_or_path is None:
model_input = model_input.to(weight_dtype)
# Sample noise that we'll add to the latents
noise = torch.randn_like(model_input)
bsz = model_input.shape[0]
# Sample a random timestep for each image
if not args.do_edm_style_training:
timesteps = torch.randint(
0, noise_scheduler.config.num_train_timesteps, (bsz,), device=model_input.device
)
timesteps = timesteps.long()
else:
# in EDM formulation, the model is conditioned on the pre-conditioned noise levels
# instead of discrete timesteps, so here we sample indices to get the noise levels
# from `scheduler.timesteps`
indices = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,))
timesteps = noise_scheduler.timesteps[indices].to(device=model_input.device)
timesteps = torch.randint(
0, noise_scheduler.config.num_train_timesteps, (bsz,), device=model_input.device
)
timesteps = timesteps.long()
# Add noise to the model input according to the noise magnitude at each timestep
# (this is the forward diffusion process)
noisy_model_input = noise_scheduler.add_noise(model_input, noise, timesteps)
# For EDM-style training, we first obtain the sigmas based on the continuous timesteps.
# We then precondition the final model inputs based on these sigmas instead of the timesteps.
# Follow: Section 5 of https://arxiv.org/abs/2206.00364.
if args.do_edm_style_training:
sigmas = get_sigmas(timesteps, len(noisy_model_input.shape), noisy_model_input.dtype)
if "EDM" in scheduler_type:
inp_noisy_latents = noise_scheduler.precondition_inputs(noisy_model_input, sigmas)
else:
inp_noisy_latents = noisy_model_input / ((sigmas**2 + 1) ** 0.5)
# time ids
add_time_ids = torch.cat(
@@ -1651,7 +1551,7 @@ def main(args):
}
prompt_embeds_input = prompt_embeds.repeat(elems_to_repeat_text_embeds, 1, 1)
model_pred = unet(
inp_noisy_latents if args.do_edm_style_training else noisy_model_input,
noisy_model_input,
timesteps,
prompt_embeds_input,
added_cond_kwargs=unet_added_conditions,
@@ -1670,43 +1570,18 @@ def main(args):
)
prompt_embeds_input = prompt_embeds.repeat(elems_to_repeat_text_embeds, 1, 1)
model_pred = unet(
inp_noisy_latents if args.do_edm_style_training else noisy_model_input,
noisy_model_input,
timesteps,
prompt_embeds_input,
added_cond_kwargs=unet_added_conditions,
return_dict=False,
)[0]
weighting = None
if args.do_edm_style_training:
# Similar to the input preconditioning, the model predictions are also preconditioned
# on noised model inputs (before preconditioning) and the sigmas.
# Follow: Section 5 of https://arxiv.org/abs/2206.00364.
if "EDM" in scheduler_type:
model_pred = noise_scheduler.precondition_outputs(noisy_model_input, model_pred, sigmas)
else:
if noise_scheduler.config.prediction_type == "epsilon":
model_pred = model_pred * (-sigmas) + noisy_model_input
elif noise_scheduler.config.prediction_type == "v_prediction":
model_pred = model_pred * (-sigmas / (sigmas**2 + 1) ** 0.5) + (
noisy_model_input / (sigmas**2 + 1)
)
# We are not doing weighting here because it tends result in numerical problems.
# See: https://github.com/huggingface/diffusers/pull/7126#issuecomment-1968523051
# There might be other alternatives for weighting as well:
# https://github.com/huggingface/diffusers/pull/7126#discussion_r1505404686
if "EDM" not in scheduler_type:
weighting = (sigmas**-2.0).float()
# Get the target for loss depending on the prediction type
if noise_scheduler.config.prediction_type == "epsilon":
target = model_input if args.do_edm_style_training else noise
target = noise
elif noise_scheduler.config.prediction_type == "v_prediction":
target = (
model_input
if args.do_edm_style_training
else noise_scheduler.get_velocity(model_input, noise, timesteps)
)
target = noise_scheduler.get_velocity(model_input, noise, timesteps)
else:
raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
@@ -1716,28 +1591,10 @@ def main(args):
target, target_prior = torch.chunk(target, 2, dim=0)
# Compute prior loss
if weighting is not None:
prior_loss = torch.mean(
(weighting.float() * (model_pred_prior.float() - target_prior.float()) ** 2).reshape(
target_prior.shape[0], -1
),
1,
)
prior_loss = prior_loss.mean()
else:
prior_loss = F.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean")
prior_loss = F.mse_loss(model_pred_prior.float(), target_prior.float(), reduction="mean")
if args.snr_gamma is None:
if weighting is not None:
loss = torch.mean(
(weighting.float() * (model_pred.float() - target.float()) ** 2).reshape(
target.shape[0], -1
),
1,
)
loss = loss.mean()
else:
loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
else:
# Compute loss-weights as per Section 3.4 of https://arxiv.org/abs/2303.09556.
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
@@ -1839,6 +1696,7 @@ def main(args):
variant=args.variant,
torch_dtype=weight_dtype,
)
pipeline_args = {"prompt": args.validation_prompt}
images = log_validation(
@@ -35,7 +35,7 @@ import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import ProjectConfiguration, set_seed
from datasets import concatenate_datasets, load_dataset
from datasets import load_dataset
from huggingface_hub import create_repo, upload_folder
from packaging import version
from torchvision import transforms
@@ -895,20 +895,14 @@ def main(args):
# fingerprint used by the cache for the other processes to load the result
# details: https://github.com/huggingface/diffusers/pull/4038#discussion_r1266078401
new_fingerprint = Hasher.hash(args)
new_fingerprint_for_vae = Hasher.hash(vae_path)
train_dataset_with_embeddings = train_dataset.map(
compute_embeddings_fn, batched=True, new_fingerprint=new_fingerprint
)
train_dataset_with_vae = train_dataset.map(
new_fingerprint_for_vae = Hasher.hash("vae")
train_dataset = train_dataset.map(compute_embeddings_fn, batched=True, new_fingerprint=new_fingerprint)
train_dataset = train_dataset.map(
compute_vae_encodings_fn,
batched=True,
batch_size=args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps,
new_fingerprint=new_fingerprint_for_vae,
)
precomputed_dataset = concatenate_datasets(
[train_dataset_with_embeddings, train_dataset_with_vae.remove_columns(["image", "text"])], axis=1
)
precomputed_dataset = precomputed_dataset.with_transform(preprocess_train)
del text_encoders, tokenizers, vae
gc.collect()
@@ -931,7 +925,7 @@ def main(args):
# DataLoaders creation:
train_dataloader = torch.utils.data.DataLoader(
precomputed_dataset,
train_dataset,
shuffle=True,
collate_fn=collate_fn,
batch_size=args.train_batch_size,
@@ -982,7 +976,7 @@ def main(args):
total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
logger.info("***** Running training *****")
logger.info(f" Num examples = {len(precomputed_dataset)}")
logger.info(f" Num examples = {len(train_dataset)}")
logger.info(f" Num Epochs = {args.num_train_epochs}")
logger.info(f" Instantaneous batch size per device = {args.train_batch_size}")
logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
@@ -2,6 +2,7 @@ accelerate>=0.16.0
torchvision
transformers>=4.25.1
wandb
huggingface-cli
bitsandbytes
deepspeed
peft>=0.6.0
-4
View File
@@ -25,7 +25,3 @@ skip-magic-trailing-comma = false
# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
[tool.pytest.ini_options]
doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
doctest_glob="**/*.md"
+4 -4
View File
@@ -9,11 +9,11 @@ from diffusers import AutoencoderKL, DPMSolverMultistepScheduler, PixArtAlphaPip
ckpt_id = "PixArt-alpha/PixArt-alpha"
# https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/scripts/inference.py#L125
interpolation_scale = {256: 0.5, 512: 1, 1024: 2}
interpolation_scale = {512: 1, 1024: 2}
def main(args):
all_state_dict = torch.load(args.orig_ckpt_path, map_location="cpu")
all_state_dict = torch.load(args.orig_ckpt_path)
state_dict = all_state_dict.pop("state_dict")
converted_state_dict = {}
@@ -22,6 +22,7 @@ def main(args):
converted_state_dict["pos_embed.proj.bias"] = state_dict.pop("x_embedder.proj.bias")
# Caption projection.
converted_state_dict["caption_projection.y_embedding"] = state_dict.pop("y_embedder.y_embedding")
converted_state_dict["caption_projection.linear_1.weight"] = state_dict.pop("y_embedder.y_proj.fc1.weight")
converted_state_dict["caption_projection.linear_1.bias"] = state_dict.pop("y_embedder.y_proj.fc1.bias")
converted_state_dict["caption_projection.linear_2.weight"] = state_dict.pop("y_embedder.y_proj.fc2.weight")
@@ -154,7 +155,6 @@ def main(args):
assert transformer.pos_embed.pos_embed is not None
state_dict.pop("pos_embed")
state_dict.pop("y_embedder.y_embedding")
assert len(state_dict) == 0, f"State dict is not empty, {state_dict.keys()}"
num_model_params = sum(p.numel() for p in transformer.parameters())
@@ -187,7 +187,7 @@ if __name__ == "__main__":
"--image_size",
default=1024,
type=int,
choices=[256, 512, 1024],
choices=[512, 1024],
required=False,
help="Image size of pretrained model, either 512 or 1024.",
)
-185
View File
@@ -1,185 +0,0 @@
import doctest
import inspect
import os
import re
from typing import Iterable
from .utils import is_pytest_available
if is_pytest_available():
from _pytest.doctest import (
Module,
_get_checker,
_get_continue_on_failure,
_get_runner,
_is_mocked,
_patch_unwrap_mock_aware,
get_optionflags,
import_path,
)
from _pytest.outcomes import skip
from pytest import DoctestItem
else:
Module = object
DoctestItem = object
"""
The following contains utils to run the documentation tests without having to overwrite any files.
The `preprocess_string` function adds `# doctest: +IGNORE_RESULT` markers on the fly anywhere a `load_dataset` call is
made as a print would otherwise fail the corresonding line.
To skip cuda tests, make sure to call `SKIP_CUDA_DOCTEST=1 pytest --doctest-modules <path_to_files_to_test>
"""
def preprocess_string(string, skip_cuda_tests):
"""Prepare a docstring or a `.md` file to be run by doctest.
The argument `string` would be the whole file content if it is a `.md` file. For a python file, it would be one of
its docstring. In each case, it may contain multiple python code examples. If `skip_cuda_tests` is `True` and a
cuda stuff is detective (with a heuristic), this method will return an empty string so no doctest will be run for
`string`.
"""
codeblock_pattern = r"(```(?:python|py)\s*\n\s*>>> )((?:(?!```)[^])*?```)"
codeblocks = re.split(re.compile(codeblock_pattern, flags=re.MULTILINE | re.DOTALL), string)
is_cuda_found = False
for i, codeblock in enumerate(codeblocks):
if "load_dataset(" in codeblock and "# doctest: +IGNORE_RESULT" not in codeblock:
codeblocks[i] = re.sub(r"(>>> .*load_dataset\(.*)", r"\1 # doctest: +IGNORE_RESULT", codeblock)
if (
(">>>" in codeblock or "..." in codeblock)
and re.search(r"cuda|to\(0\)|device=0", codeblock)
and skip_cuda_tests
):
is_cuda_found = True
break
modified_string = ""
if not is_cuda_found:
modified_string = "".join(codeblocks)
return modified_string
class HfDocTestParser(doctest.DocTestParser):
"""
Overwrites the DocTestParser from doctest to properly parse the codeblocks that are formatted with black. This
means that there are no extra lines at the end of our snippets. The `# doctest: +IGNORE_RESULT` marker is also
added anywhere a `load_dataset` call is made as a print would otherwise fail the corresponding line.
Tests involving cuda are skipped base on a naive pattern that should be updated if it is not enough.
"""
# This regular expression is used to find doctest examples in a
# string. It defines three groups: `source` is the source code
# (including leading indentation and prompts); `indent` is the
# indentation of the first (PS1) line of the source code; and
# `want` is the expected output (including leading indentation).
# fmt: off
_EXAMPLE_RE = re.compile(r'''
# Source consists of a PS1 line followed by zero or more PS2 lines.
(?P<source>
(?:^(?P<indent> [ ]*) >>> .*) # PS1 line
(?:\n [ ]* \.\.\. .*)*) # PS2 lines
\n?
# Want consists of any non-blank lines that do not start with PS1.
(?P<want> (?:(?![ ]*$) # Not a blank line
(?![ ]*>>>) # Not a line starting with PS1
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
(?:(?!```).)* # Match any character except '`' until a '```' is found (this is specific to HF because black removes the last line)
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
(?:\n|$) # Match a new line or end of string
)*)
''', re.MULTILINE | re.VERBOSE
)
# fmt: on
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
skip_cuda_tests: bool = bool(os.environ.get("SKIP_CUDA_DOCTEST", False))
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
def parse(self, string, name="<string>"):
"""
Overwrites the `parse` method to incorporate a skip for CUDA tests, and remove logs and dataset prints before
calling `super().parse`
"""
string = preprocess_string(string, self.skip_cuda_tests)
return super().parse(string, name)
class HfDoctestModule(Module):
"""
Overwrites the `DoctestModule` of the pytest package to make sure the HFDocTestParser is used when discovering
tests.
"""
def collect(self) -> Iterable["DoctestItem"]:
class MockAwareDocTestFinder(doctest.DocTestFinder):
"""A hackish doctest finder that overrides stdlib internals to fix a stdlib bug.
https://github.com/pytest-dev/pytest/issues/3456 https://bugs.python.org/issue25532
"""
def _find_lineno(self, obj, source_lines):
"""Doctest code does not take into account `@property`, this
is a hackish way to fix it. https://bugs.python.org/issue17446
Wrapped Doctests will need to be unwrapped so the correct line number is returned. This will be
reported upstream. #8796
"""
if isinstance(obj, property):
obj = getattr(obj, "fget", obj)
if hasattr(obj, "__wrapped__"):
# Get the main obj in case of it being wrapped
obj = inspect.unwrap(obj)
# Type ignored because this is a private function.
return super()._find_lineno( # type:ignore[misc]
obj,
source_lines,
)
def _find(self, tests, obj, name, module, source_lines, globs, seen) -> None:
if _is_mocked(obj):
return
with _patch_unwrap_mock_aware():
# Type ignored because this is a private function.
super()._find( # type:ignore[misc]
tests, obj, name, module, source_lines, globs, seen
)
if self.path.name == "conftest.py":
module = self.config.pluginmanager._importconftest(
self.path,
self.config.getoption("importmode"),
rootpath=self.config.rootpath,
)
else:
try:
module = import_path(
self.path,
root=self.config.rootpath,
mode=self.config.getoption("importmode"),
)
except ImportError:
if self.config.getvalue("doctest_ignore_import_errors"):
skip("unable to import module %r" % self.path)
else:
raise
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
finder = MockAwareDocTestFinder(parser=HfDocTestParser())
# !!!!!!!!!!! HF Specific !!!!!!!!!!!
optionflags = get_optionflags(self)
runner = _get_runner(
verbose=False,
optionflags=optionflags,
checker=_get_checker(),
continue_on_failure=_get_continue_on_failure(self.config),
)
for test in finder.find(module, module.__name__):
if test.examples: # skip empty doctests and cuda
yield DoctestItem.from_parent(self, name=test.name, runner=runner, dtest=test)
@@ -97,7 +97,6 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
norm_eps: float = 1e-5,
attention_type: str = "default",
caption_channels: int = None,
interpolation_scale: float = None,
):
super().__init__()
self.use_linear_projection = use_linear_projection
@@ -169,9 +168,8 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
self.width = sample_size
self.patch_size = patch_size
interpolation_scale = (
interpolation_scale if interpolation_scale is not None else max(self.config.sample_size // 64, 1)
)
interpolation_scale = self.config.sample_size // 64 # => 64 (= 512 pixart) has interpolation scale 1
interpolation_scale = max(interpolation_scale, 1)
self.pos_embed = PatchEmbed(
height=sample_size,
width=sample_size,
@@ -400,22 +400,15 @@ class AnimateDiffPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -516,9 +509,9 @@ class AnimateDiffPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
@@ -478,22 +478,15 @@ class AnimateDiffVideoToVideoPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -596,9 +589,9 @@ class AnimateDiffVideoToVideoPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def get_timesteps(self, num_inference_steps, timesteps, strength, device):
@@ -510,22 +510,15 @@ class StableDiffusionControlNetPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -733,9 +726,9 @@ class StableDiffusionControlNetPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def check_image(self, image, prompt, prompt_embeds):
@@ -503,22 +503,15 @@ class StableDiffusionControlNetImg2ImgPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -720,9 +713,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
@@ -628,22 +628,15 @@ class StableDiffusionControlNetInpaintPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -878,9 +871,9 @@ class StableDiffusionControlNetInpaintPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
@@ -537,22 +537,15 @@ class StableDiffusionXLControlNetInpaintPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -824,9 +817,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def prepare_control_image(
@@ -515,22 +515,15 @@ class StableDiffusionXLControlNetPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -737,9 +730,9 @@ class StableDiffusionXLControlNetPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
@@ -567,22 +567,15 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -801,9 +794,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet_sd_xl.StableDiffusionXLControlNetPipeline.check_image
@@ -453,22 +453,15 @@ class LatentConsistencyModelImg2ImgPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -654,9 +647,9 @@ class LatentConsistencyModelImg2ImgPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
@property
@@ -437,22 +437,15 @@ class LatentConsistencyModelPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -586,9 +579,9 @@ class LatentConsistencyModelPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
@property
+5 -12
View File
@@ -582,9 +582,9 @@ class PIAPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_ip_adapter_image_embeds
@@ -619,22 +619,15 @@ class PIAPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -133,42 +133,6 @@ ASPECT_RATIO_512_BIN = {
"4.0": [1024.0, 256.0],
}
ASPECT_RATIO_256_BIN = {
"0.25": [128.0, 512.0],
"0.28": [128.0, 464.0],
"0.32": [144.0, 448.0],
"0.33": [144.0, 432.0],
"0.35": [144.0, 416.0],
"0.4": [160.0, 400.0],
"0.42": [160.0, 384.0],
"0.48": [176.0, 368.0],
"0.5": [176.0, 352.0],
"0.52": [176.0, 336.0],
"0.57": [192.0, 336.0],
"0.6": [192.0, 320.0],
"0.68": [208.0, 304.0],
"0.72": [208.0, 288.0],
"0.78": [224.0, 288.0],
"0.82": [224.0, 272.0],
"0.88": [240.0, 272.0],
"0.94": [240.0, 256.0],
"1.0": [256.0, 256.0],
"1.07": [256.0, 240.0],
"1.13": [272.0, 240.0],
"1.21": [272.0, 224.0],
"1.29": [288.0, 224.0],
"1.38": [288.0, 208.0],
"1.46": [304.0, 208.0],
"1.67": [320.0, 192.0],
"1.75": [336.0, 192.0],
"2.0": [352.0, 176.0],
"2.09": [368.0, 176.0],
"2.4": [384.0, 160.0],
"2.5": [400.0, 160.0],
"3.0": [432.0, 144.0],
"4.0": [512.0, 128.0],
}
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
def retrieve_timesteps(
@@ -296,7 +260,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
prompt_attention_mask: Optional[torch.FloatTensor] = None,
negative_prompt_attention_mask: Optional[torch.FloatTensor] = None,
clean_caption: bool = False,
max_sequence_length: int = 120,
**kwargs,
):
r"""
@@ -321,9 +284,8 @@ class PixArtAlphaPipeline(DiffusionPipeline):
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
Pre-generated negative text embeddings. For PixArt-Alpha, it's should be the embeddings of the ""
string.
clean_caption (`bool`, defaults to `False`):
clean_caption (bool, defaults to `False`):
If `True`, the function will preprocess and clean the provided caption before encoding.
max_sequence_length (`int`, defaults to 120): Maximum sequence length to use for the prompt.
"""
if "mask_feature" in kwargs:
@@ -341,7 +303,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
batch_size = prompt_embeds.shape[0]
# See Section 3.1. of the paper.
max_length = max_sequence_length
max_length = 120
if prompt_embeds is None:
prompt = self._text_preprocessing(prompt, clean_caption=clean_caption)
@@ -726,7 +688,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
callback_steps: int = 1,
clean_caption: bool = True,
use_resolution_binning: bool = True,
max_sequence_length: int = 120,
**kwargs,
) -> Union[ImagePipelineOutput, Tuple]:
"""
@@ -796,7 +757,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
If set to `True`, the requested height and width are first mapped to the closest resolutions using
`ASPECT_RATIO_1024_BIN`. After the produced latents are decoded into images, they are resized back to
the requested resolution. Useful for generating non-square images.
max_sequence_length (`int` defaults to 120): Maximum sequence length to use with the `prompt`.
Examples:
@@ -812,14 +772,9 @@ class PixArtAlphaPipeline(DiffusionPipeline):
height = height or self.transformer.config.sample_size * self.vae_scale_factor
width = width or self.transformer.config.sample_size * self.vae_scale_factor
if use_resolution_binning:
if self.transformer.config.sample_size == 128:
aspect_ratio_bin = ASPECT_RATIO_1024_BIN
elif self.transformer.config.sample_size == 64:
aspect_ratio_bin = ASPECT_RATIO_512_BIN
elif self.transformer.config.sample_size == 32:
aspect_ratio_bin = ASPECT_RATIO_256_BIN
else:
raise ValueError("Invalid sample size")
aspect_ratio_bin = (
ASPECT_RATIO_1024_BIN if self.transformer.config.sample_size == 128 else ASPECT_RATIO_512_BIN
)
orig_height, orig_width = height, width
height, width = self.classify_height_width_bin(height, width, ratios=aspect_ratio_bin)
@@ -867,7 +822,6 @@ class PixArtAlphaPipeline(DiffusionPipeline):
prompt_attention_mask=prompt_attention_mask,
negative_prompt_attention_mask=negative_prompt_attention_mask,
clean_caption=clean_caption,
max_sequence_length=max_sequence_length,
)
if do_classifier_free_guidance:
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
@@ -520,22 +520,15 @@ class StableDiffusionPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -646,9 +639,9 @@ class StableDiffusionPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
@@ -564,22 +564,15 @@ class StableDiffusionImg2ImgPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -692,9 +685,9 @@ class StableDiffusionImg2ImgPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def get_timesteps(self, num_inference_steps, strength, device):
@@ -636,22 +636,15 @@ class StableDiffusionInpaintPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -774,9 +767,9 @@ class StableDiffusionInpaintPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def prepare_latents(
@@ -59,66 +59,6 @@ EXAMPLE_DOC_STRING = """
"""
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
"""
Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
"""
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
# rescale the results from guidance (fixes overexposure)
noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
# mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images
noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
return noise_cfg
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
def retrieve_timesteps(
scheduler,
num_inference_steps: Optional[int] = None,
device: Optional[Union[str, torch.device]] = None,
timesteps: Optional[List[int]] = None,
**kwargs,
):
"""
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
Args:
scheduler (`SchedulerMixin`):
The scheduler to get timesteps from.
num_inference_steps (`int`):
The number of diffusion steps used when generating samples with a pre-trained model. If used,
`timesteps` must be `None`.
device (`str` or `torch.device`, *optional*):
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
timesteps (`List[int]`, *optional*):
Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
must be `None`.
Returns:
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
second element is the number of inference steps.
"""
if timesteps is not None:
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
if not accepts_timesteps:
raise ValueError(
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
f" timestep schedules. Please check whether you are using the correct scheduler."
)
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
timesteps = scheduler.timesteps
num_inference_steps = len(timesteps)
else:
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
timesteps = scheduler.timesteps
return timesteps, num_inference_steps
@dataclass
class LDM3DPipelineOutput(BaseOutput):
"""
@@ -185,7 +125,6 @@ class StableDiffusionLDM3DPipeline(
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
_exclude_from_cpu_offload = ["safety_checker"]
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
def __init__(
self,
@@ -503,22 +442,15 @@ class StableDiffusionLDM3DPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -621,9 +553,9 @@ class StableDiffusionLDM3DPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
@@ -643,66 +575,6 @@ class StableDiffusionLDM3DPipeline(
latents = latents * self.scheduler.init_noise_sigma
return latents
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
"""
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args:
timesteps (`torch.Tensor`):
generate embedding vectors at these timesteps
embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate
dtype:
data type of the generated embeddings
Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
"""
assert len(w.shape) == 1
w = w * 1000.0
half_dim = embedding_dim // 2
emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
emb = w.to(dtype)[:, None] * emb[None, :]
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
if embedding_dim % 2 == 1: # zero pad
emb = torch.nn.functional.pad(emb, (0, 1))
assert emb.shape == (w.shape[0], embedding_dim)
return emb
@property
def guidance_scale(self):
return self._guidance_scale
@property
def guidance_rescale(self):
return self._guidance_rescale
@property
def clip_skip(self):
return self._clip_skip
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
# corresponds to doing no classifier free guidance.
@property
def do_classifier_free_guidance(self):
return self._guidance_scale > 1 and self.unet.config.time_cond_proj_dim is None
@property
def cross_attention_kwargs(self):
return self._cross_attention_kwargs
@property
def num_timesteps(self):
return self._num_timesteps
@property
def interrupt(self):
return self._interrupt
@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
@@ -711,7 +583,6 @@ class StableDiffusionLDM3DPipeline(
height: Optional[int] = None,
width: Optional[int] = None,
num_inference_steps: int = 49,
timesteps: List[int] = None,
guidance_scale: float = 5.0,
negative_prompt: Optional[Union[str, List[str]]] = None,
num_images_per_prompt: Optional[int] = 1,
@@ -724,12 +595,10 @@ class StableDiffusionLDM3DPipeline(
ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
output_type: Optional[str] = "pil",
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
guidance_rescale: float = 0.0,
clip_skip: Optional[int] = None,
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
**kwargs,
):
r"""
The call function to the pipeline for generation.
@@ -780,21 +649,18 @@ class StableDiffusionLDM3DPipeline(
return_dict (`bool`, *optional*, defaults to `True`):
Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
plain tuple.
callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at
every step.
cross_attention_kwargs (`dict`, *optional*):
A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
clip_skip (`int`, *optional*):
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.
callback_on_step_end (`Callable`, *optional*):
A function that calls at the end of each denoising steps during the inference. The function is called
with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
`callback_on_step_end_tensor_inputs`.
callback_on_step_end_tensor_inputs (`List`, *optional*):
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
`._callback_tensor_inputs` attribute of your pipeline class.
Examples:
Returns:
@@ -804,22 +670,6 @@ class StableDiffusionLDM3DPipeline(
second element is a list of `bool`s indicating whether the corresponding generated image contains
"not-safe-for-work" (nsfw) content.
"""
callback = kwargs.pop("callback", None)
callback_steps = kwargs.pop("callback_steps", None)
if callback is not None:
deprecate(
"callback",
"1.0.0",
"Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
)
if callback_steps is not None:
deprecate(
"callback_steps",
"1.0.0",
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
)
# 0. Default height and width to unet
height = height or self.unet.config.sample_size * self.vae_scale_factor
width = width or self.unet.config.sample_size * self.vae_scale_factor
@@ -835,15 +685,8 @@ class StableDiffusionLDM3DPipeline(
negative_prompt_embeds,
ip_adapter_image,
ip_adapter_image_embeds,
callback_on_step_end_tensor_inputs,
)
self._guidance_scale = guidance_scale
self._guidance_rescale = guidance_rescale
self._clip_skip = clip_skip
self._cross_attention_kwargs = cross_attention_kwargs
self._interrupt = False
# 2. Define call parameters
if prompt is not None and isinstance(prompt, str):
batch_size = 1
@@ -853,6 +696,10 @@ class StableDiffusionLDM3DPipeline(
batch_size = prompt_embeds.shape[0]
device = self._execution_device
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
# corresponds to doing no classifier free guidance.
do_classifier_free_guidance = guidance_scale > 1.0
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
image_embeds = self.prepare_ip_adapter_image_embeds(
@@ -860,7 +707,7 @@ class StableDiffusionLDM3DPipeline(
ip_adapter_image_embeds,
device,
batch_size * num_images_per_prompt,
self.do_classifier_free_guidance,
do_classifier_free_guidance,
)
# 3. Encode input prompt
@@ -868,7 +715,7 @@ class StableDiffusionLDM3DPipeline(
prompt,
device,
num_images_per_prompt,
self.do_classifier_free_guidance,
do_classifier_free_guidance,
negative_prompt,
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
@@ -877,11 +724,12 @@ class StableDiffusionLDM3DPipeline(
# For classifier free guidance, we need to do two forward passes.
# Here we concatenate the unconditional and text embeddings into a single batch
# to avoid doing two forward passes
if self.do_classifier_free_guidance:
if do_classifier_free_guidance:
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
# 4. Prepare timesteps
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
self.scheduler.set_timesteps(num_inference_steps, device=device)
timesteps = self.scheduler.timesteps
# 5. Prepare latent variables
num_channels_latents = self.unet.config.in_channels
@@ -902,24 +750,12 @@ class StableDiffusionLDM3DPipeline(
# 6.1 Add image embeds for IP-Adapter
added_cond_kwargs = {"image_embeds": image_embeds} if ip_adapter_image is not None else None
# 6.2 Optionally get Guidance Scale Embedding
timestep_cond = None
if self.unet.config.time_cond_proj_dim is not None:
guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
timestep_cond = self.get_guidance_scale_embedding(
guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
).to(device=device, dtype=latents.dtype)
# 7. Denoising loop
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
self._num_timesteps = len(timesteps)
with self.progress_bar(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
if self.interrupt:
continue
# expand the latents if we are doing classifier free guidance
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
# predict the noise residual
@@ -927,34 +763,19 @@ class StableDiffusionLDM3DPipeline(
latent_model_input,
t,
encoder_hidden_states=prompt_embeds,
timestep_cond=timestep_cond,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
)[0]
# perform guidance
if self.do_classifier_free_guidance:
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
# Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
# compute the previous noisy sample x_t -> x_t-1
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
if callback_on_step_end is not None:
callback_kwargs = {}
for k in callback_on_step_end_tensor_inputs:
callback_kwargs[k] = locals()[k]
callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
latents = callback_outputs.pop("latents", latents)
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
progress_bar.update()
@@ -414,22 +414,15 @@ class StableDiffusionPanoramaPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -557,9 +550,9 @@ class StableDiffusionPanoramaPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
@@ -549,22 +549,15 @@ class StableDiffusionXLPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -678,9 +671,9 @@ class StableDiffusionXLPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
@@ -616,9 +616,9 @@ class StableDiffusionXLImg2ImgPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def get_timesteps(self, num_inference_steps, strength, device, denoising_start=None):
@@ -782,22 +782,15 @@ class StableDiffusionXLImg2ImgPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -486,22 +486,15 @@ class StableDiffusionXLInpaintPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -858,9 +851,9 @@ class StableDiffusionXLInpaintPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
def prepare_latents(
@@ -563,22 +563,15 @@ class StableDiffusionXLAdapterPipeline(
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1)
image_embeds.append(single_image_embeds)
return image_embeds
@@ -693,9 +686,9 @@ class StableDiffusionXLAdapterPipeline(
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
elif ip_adapter_image_embeds[0].ndim != 3:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
-1
View File
@@ -69,7 +69,6 @@ from .import_utils import (
is_note_seq_available,
is_onnx_available,
is_peft_available,
is_pytest_available,
is_scipy_available,
is_tensorboard_available,
is_torch_available,
-11
View File
@@ -278,13 +278,6 @@ try:
except importlib_metadata.PackageNotFoundError:
_peft_available = False
_pytest_available = importlib.util.find_spec("pytest") is not None
try:
_pytest_version = importlib_metadata.version("pytest")
logger.debug(f"Successfully imported pytest version {_pytest_version}")
except importlib_metadata.PackageNotFoundError:
_pytest_available = False
_torchvision_available = importlib.util.find_spec("torchvision") is not None
try:
_torchvision_version = importlib_metadata.version("torchvision")
@@ -381,10 +374,6 @@ def is_peft_available():
return _peft_available
def is_pytest_available():
return _pytest_available
def is_torchvision_available():
return _torchvision_available
+1 -1
View File
@@ -779,7 +779,7 @@ class CaptureLogger:
>>> logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.py")
>>> with CaptureLogger(logger) as cl:
... logger.info(msg)
>>> assert cl.out, msg + \n
>>> assert cl.out, msg + "\n"
```
"""
-85
View File
@@ -1,85 +0,0 @@
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script is responsible for cleaning the list of doctests by making sure the entries all exist and are in
alphabetical order.
Usage (from the root of the repo):
Check that the doctest list is properly sorted and all files exist (used in `make repo-consistency`):
```bash
python utils/check_doctest_list.py
```
Auto-sort the doctest list if it is not properly sorted (used in `make fix-copies`):
```bash
python utils/check_doctest_list.py --fix_and_overwrite
```
"""
import argparse
import os
# All paths are set with the intent you should run this script from the root of the repo with the command
# python utils/check_doctest_list.py
REPO_PATH = "."
DOCTEST_FILE_PATHS = ["not_doctested.txt"]
def clean_doctest_list(doctest_file: str, overwrite: bool = False):
"""
Cleans the doctest in a given file.
Args:
doctest_file (`str`):
The path to the doctest file to check or clean.
overwrite (`bool`, *optional*, defaults to `False`):
Whether or not to fix problems. If `False`, will error when the file is not clean.
"""
non_existent_paths = []
all_paths = []
with open(doctest_file, "r", encoding="utf-8") as f:
for line in f:
line = line.strip().split(" ")[0]
path = os.path.join(REPO_PATH, line)
if not (os.path.isfile(path) or os.path.isdir(path)):
non_existent_paths.append(line)
all_paths.append(line)
if len(non_existent_paths) > 0:
non_existent_paths = "\n".join([f"- {f}" for f in non_existent_paths])
raise ValueError(f"`{doctest_file}` contains non-existent paths:\n{non_existent_paths}")
sorted_paths = sorted(all_paths)
if all_paths != sorted_paths:
if not overwrite:
raise ValueError(
f"Files in `{doctest_file}` are not in alphabetical order, run `make fix-copies` to fix "
"this automatically."
)
with open(doctest_file, "w", encoding="utf-8") as f:
f.write("\n".join(sorted_paths) + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
args = parser.parse_args()
for doctest_file in DOCTEST_FILE_PATHS:
doctest_file = os.path.join(REPO_PATH, "utils", doctest_file)
clean_doctest_list(doctest_file, args.fix_and_overwrite)
-100
View File
@@ -1,100 +0,0 @@
docs/source/en/training/create_dataset.md
docs/source/en/training/wuerstchen.md
docs/source/en/training/adapt_a_model.md
docs/source/en/training/text2image.md
docs/source/en/training/custom_diffusion.md
docs/source/en/training/sdxl.md
docs/source/en/training/unconditional_training.md
docs/source/en/training/overview.md
docs/source/en/training/t2i_adapters.md
docs/source/en/training/lcm_distill.md
docs/source/en/training/instructpix2pix.md
docs/source/en/training/kandinsky.md
docs/source/en/training/lora.md
docs/source/en/training/controlnet.md
docs/source/en/training/dreambooth.md
docs/source/en/training/ddpo.md
docs/source/en/training/text_inversion.md
docs/source/en/training/distributed_inference.md
docs/source/en/optimization/torch2.0.md
docs/source/en/optimization/coreml.md
docs/source/en/optimization/tome.md
docs/source/en/optimization/xformers.md
docs/source/en/optimization/deepcache.md
docs/source/en/optimization/fp16.md
docs/source/en/optimization/memory.md
docs/source/en/optimization/habana.md
docs/source/en/optimization/open_vino.md
docs/source/en/optimization/mps.md
docs/source/en/optimization/opt_overview.md
docs/source/en/optimization/onnx.md
docs/source/en/tutorials/basic_training.md
docs/source/ko/index.md
docs/source/ko/quicktour.md
docs/source/ko/in_translation.md
docs/source/ko/installation.md
docs/source/ko/stable_diffusion.md
docs/source/ko/training/create_dataset.md
docs/source/ko/training/wuerstchen.md
docs/source/ko/training/adapt_a_model.md
docs/source/ko/training/text2image.md
docs/source/ko/training/custom_diffusion.md
docs/source/ko/training/sdxl.md
docs/source/ko/training/unconditional_training.md
docs/source/ko/training/overview.md
docs/source/ko/training/t2i_adapters.md
docs/source/ko/training/lcm_distill.md
docs/source/ko/training/instructpix2pix.md
docs/source/ko/training/kandinsky.md
docs/source/ko/training/lora.md
docs/source/ko/training/controlnet.md
docs/source/ko/training/dreambooth.md
docs/source/ko/training/ddpo.md
docs/source/ko/training/text_inversion.md
docs/source/ko/training/distributed_inference.md
docs/source/ko/optimization/torch2.0.md
docs/source/ko/optimization/coreml.md
docs/source/ko/optimization/tome.md
docs/source/ko/optimization/xformers.md
docs/source/ko/optimization/deepcache.md
docs/source/ko/optimization/fp16.md
docs/source/ko/optimization/memory.md
docs/source/ko/optimization/habana.md
docs/source/ko/optimization/open_vino.md
docs/source/ko/optimization/mps.md
docs/source/ko/optimization/opt_overview.md
docs/source/ko/optimization/onnx.md
docs/source/ko/api/pipelines/stable_diffusion/stable_diffusion_xl.md
docs/source/ko/tutorials/basic_training.md
docs/source/ko/using-diffusers/loading.md
docs/source/ko/using-diffusers/unconditional_image_generation.md
docs/source/ko/using-diffusers/depth2img.md
docs/source/ko/using-diffusers/control_brightness.md
docs/source/ko/using-diffusers/contribute_pipeline.md
docs/source/ko/using-diffusers/img2img.md
docs/source/ko/using-diffusers/weighted_prompts.md
docs/source/ko/using-diffusers/schedulers.md
docs/source/ko/using-diffusers/custom_pipeline_examples.md
docs/source/ko/using-diffusers/using_safetensors.md
docs/source/ko/using-diffusers/reproducibility.md
docs/source/ko/using-diffusers/inpaint.md
docs/source/ko/using-diffusers/conditional_image_generation.md
docs/source/ko/using-diffusers/controlling_generation.md
docs/source/ko/using-diffusers/reusing_seeds.md
docs/source/ko/using-diffusers/textual_inversion_inference.md
docs/source/ko/using-diffusers/loading_overview.md
docs/source/ko/using-diffusers/custom_pipeline_overview.md
docs/source/ko/using-diffusers/other-formats.md
docs/source/ko/using-diffusers/stable_diffusion_jax_how_to.md
docs/source/ko/using-diffusers/pipeline_overview.md
docs/source/ko/using-diffusers/write_own_pipeline.md
docs/source/pt/index.md
docs/source/pt/quicktour.md
docs/source/pt/in_translation.md
docs/source/pt/installation.md
docs/source/pt/stable_diffusion.md
docs/source/ja/index.md
docs/source/ja/quicktour.md
docs/source/ja/in_translation.md
docs/source/ja/installation.md
docs/source/ja/stable_diffusion.md
-401
View File
@@ -1,401 +0,0 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import json
import math
import os
import re
import time
from fnmatch import fnmatch
from typing import Dict, List
import requests
from slack_sdk import WebClient
client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
def handle_test_results(test_results):
expressions = test_results.split(" ")
failed = 0
success = 0
# When the output is short enough, the output is surrounded by = signs: "== OUTPUT =="
# When it is too long, those signs are not present.
time_spent = expressions[-2] if "=" in expressions[-1] else expressions[-1]
for i, expression in enumerate(expressions):
if "failed" in expression:
failed += int(expressions[i - 1])
if "passed" in expression:
success += int(expressions[i - 1])
return failed, success, time_spent
def extract_first_line_failure(failures_short_lines):
failures = {}
file = None
in_error = False
for line in failures_short_lines.split("\n"):
if re.search(r"_ \[doctest\]", line):
in_error = True
file = line.split(" ")[2]
elif in_error and not line.split(" ")[0].isdigit():
failures[file] = line
in_error = False
return failures
class Message:
def __init__(self, title: str, doc_test_results: Dict):
self.title = title
self._time_spent = doc_test_results["time_spent"].split(",")[0]
self.n_success = doc_test_results["success"]
self.n_failures = doc_test_results["failures"]
self.n_tests = self.n_success + self.n_failures
# Failures and success of the modeling tests
self.doc_test_results = doc_test_results
@property
def time(self) -> str:
time_spent = [self._time_spent]
total_secs = 0
for time in time_spent:
time_parts = time.split(":")
# Time can be formatted as xx:xx:xx, as .xx, or as x.xx if the time spent was less than a minute.
if len(time_parts) == 1:
time_parts = [0, 0, time_parts[0]]
hours, minutes, seconds = int(time_parts[0]), int(time_parts[1]), float(time_parts[2])
total_secs += hours * 3600 + minutes * 60 + seconds
hours, minutes, seconds = total_secs // 3600, (total_secs % 3600) // 60, total_secs % 60
return f"{int(hours)}h{int(minutes)}m{int(seconds)}s"
@property
def header(self) -> Dict:
return {"type": "header", "text": {"type": "plain_text", "text": self.title}}
@property
def no_failures(self) -> Dict:
return {
"type": "section",
"text": {
"type": "plain_text",
"text": f"🌞 There were no failures: all {self.n_tests} tests passed. The suite ran in {self.time}.",
"emoji": True,
},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
},
}
@property
def failures(self) -> Dict:
return {
"type": "section",
"text": {
"type": "plain_text",
"text": (
f"There were {self.n_failures} failures, out of {self.n_tests} tests.\nThe suite ran in"
f" {self.time}."
),
"emoji": True,
},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
},
}
@property
def category_failures(self) -> List[Dict]:
failure_blocks = []
MAX_ERROR_TEXT = 3000 - len("The following examples had failures:\n\n\n\n") - len("[Truncated]\n")
line_length = 40
category_failures = {k: v["failed"] for k, v in doc_test_results.items() if isinstance(v, dict)}
def single_category_failures(category, failures):
text = ""
if len(failures) == 0:
return ""
text += f"*{category} failures*:".ljust(line_length // 2).rjust(line_length // 2) + "\n"
for idx, failure in enumerate(failures):
new_text = text + f"`{failure}`\n"
if len(new_text) > MAX_ERROR_TEXT:
text = text + "[Truncated]\n"
break
text = new_text
return text
for category, failures in category_failures.items():
report = single_category_failures(category, failures)
if len(report) == 0:
continue
block = {
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"The following examples had failures:\n\n\n{report}\n",
},
}
failure_blocks.append(block)
return failure_blocks
@property
def payload(self) -> str:
blocks = [self.header]
if self.n_failures > 0:
blocks.append(self.failures)
if self.n_failures > 0:
blocks.extend(self.category_failures)
if self.n_failures == 0:
blocks.append(self.no_failures)
return json.dumps(blocks)
@staticmethod
def error_out():
payload = [
{
"type": "section",
"text": {
"type": "plain_text",
"text": "There was an issue running the tests.",
},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
},
}
]
print("Sending the following payload")
print(json.dumps({"blocks": json.loads(payload)}))
client.chat_postMessage(
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
text="There was an issue running the tests.",
blocks=payload,
)
def post(self):
print("Sending the following payload")
print(json.dumps({"blocks": json.loads(self.payload)}))
text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."
self.thread_ts = client.chat_postMessage(
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
blocks=self.payload,
text=text,
)
def get_reply_blocks(self, job_name, job_link, failures, text):
# `text` must be less than 3001 characters in Slack SDK
# keep some room for adding "[Truncated]" when necessary
MAX_ERROR_TEXT = 3000 - len("[Truncated]")
failure_text = ""
for key, value in failures.items():
new_text = failure_text + f"*{key}*\n_{value}_\n\n"
if len(new_text) > MAX_ERROR_TEXT:
# `failure_text` here has length <= 3000
failure_text = failure_text + "[Truncated]"
break
# `failure_text` here has length <= MAX_ERROR_TEXT
failure_text = new_text
title = job_name
content = {"type": "section", "text": {"type": "mrkdwn", "text": text}}
if job_link is not None:
content["accessory"] = {
"type": "button",
"text": {"type": "plain_text", "text": "GitHub Action job", "emoji": True},
"url": job_link,
}
return [
{"type": "header", "text": {"type": "plain_text", "text": title.upper(), "emoji": True}},
content,
{"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
]
def post_reply(self):
if self.thread_ts is None:
raise ValueError("Can only post reply if a post has been made.")
job_link = self.doc_test_results.pop("job_link")
self.doc_test_results.pop("failures")
self.doc_test_results.pop("success")
self.doc_test_results.pop("time_spent")
sorted_dict = sorted(self.doc_test_results.items(), key=lambda t: t[0])
for job, job_result in sorted_dict:
if len(job_result["failures"]):
text = f"*Num failures* :{len(job_result['failed'])} \n"
failures = job_result["failures"]
blocks = self.get_reply_blocks(job, job_link, failures, text=text)
print("Sending the following reply")
print(json.dumps({"blocks": blocks}))
client.chat_postMessage(
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
text=f"Results for {job}",
blocks=blocks,
thread_ts=self.thread_ts["ts"],
)
time.sleep(1)
def get_job_links():
run_id = os.environ["GITHUB_RUN_ID"]
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}/jobs?per_page=100"
result = requests.get(url).json()
jobs = {}
try:
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
for i in range(pages_to_iterate_over):
result = requests.get(url + f"&page={i + 2}").json()
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
return jobs
except Exception as e:
print("Unknown error, could not fetch links.", e)
return {}
def retrieve_artifact(name: str):
_artifact = {}
if os.path.exists(name):
files = os.listdir(name)
for file in files:
try:
with open(os.path.join(name, file), encoding="utf-8") as f:
_artifact[file.split(".")[0]] = f.read()
except UnicodeDecodeError as e:
raise ValueError(f"Could not open {os.path.join(name, file)}.") from e
return _artifact
def retrieve_available_artifacts():
class Artifact:
def __init__(self, name: str):
self.name = name
self.paths = []
def __str__(self):
return self.name
def add_path(self, path: str):
self.paths.append({"name": self.name, "path": path})
_available_artifacts: Dict[str, Artifact] = {}
directories = filter(os.path.isdir, os.listdir())
for directory in directories:
artifact_name = directory
if artifact_name not in _available_artifacts:
_available_artifacts[artifact_name] = Artifact(artifact_name)
_available_artifacts[artifact_name].add_path(directory)
return _available_artifacts
if __name__ == "__main__":
github_actions_job_links = get_job_links()
available_artifacts = retrieve_available_artifacts()
docs = collections.OrderedDict(
[
("*.py", "API Examples"),
("*.md", "MD Examples"),
]
)
# This dict will contain all the information relative to each doc test category:
# - failed: list of failed tests
# - failures: dict in the format 'test': 'error_message'
doc_test_results = {
v: {
"failed": [],
"failures": {},
}
for v in docs.values()
}
# Link to the GitHub Action job
doc_test_results["job_link"] = github_actions_job_links.get("run_doctests")
artifact_path = available_artifacts["doc_tests_gpu_test_reports"].paths[0]
artifact = retrieve_artifact(artifact_path["name"])
if "stats" in artifact:
failed, success, time_spent = handle_test_results(artifact["stats"])
doc_test_results["failures"] = failed
doc_test_results["success"] = success
doc_test_results["time_spent"] = time_spent[1:-1] + ", "
all_failures = extract_first_line_failure(artifact["failures_short"])
for line in artifact["summary_short"].split("\n"):
if re.search("FAILED", line):
line = line.replace("FAILED ", "")
line = line.split()[0].replace("\n", "")
if "::" in line:
file_path, test = line.split("::")
else:
file_path, test = line, line
for file_regex in docs.keys():
if fnmatch(file_path, file_regex):
category = docs[file_regex]
doc_test_results[category]["failed"].append(test)
failure = all_failures[test] if test in all_failures else "N/A"
doc_test_results[category]["failures"][test] = failure
break
message = Message("🤗 Results of the doc tests.", doc_test_results)
message.post()
message.post_reply()
+2 -2
View File
@@ -39,7 +39,7 @@ def main():
open_issues = repo.get_issues(state="open")
for issue in open_issues:
labels = [label.name.lower() for label in issue.get_labels()]
labels = [label.name for label in issue.get_labels()]
if "stale" in labels:
comments = sorted(issue.get_comments(), key=lambda i: i.created_at, reverse=True)
last_comment = comments[0] if len(comments) > 0 else None
@@ -50,7 +50,7 @@ def main():
elif (
(dt.now(timezone.utc) - issue.updated_at).days > 23
and (dt.now(timezone.utc) - issue.created_at).days >= 30
and not any(label in LABELS_TO_EXEMPT for label in labels)
and not any(label.name.lower() in LABELS_TO_EXEMPT for label in labels)
):
# Post a Stalebot notification after 23 days of inactivity.
issue.create_comment(