Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f715227918 |
@@ -26,7 +26,6 @@ jobs:
|
||||
image-name:
|
||||
- diffusers-pytorch-cpu
|
||||
- diffusers-pytorch-cuda
|
||||
- diffusers-pytorch-compile-cuda
|
||||
- diffusers-flax-cpu
|
||||
- diffusers-flax-tpu
|
||||
- diffusers-onnxruntime-cpu
|
||||
|
||||
@@ -15,6 +15,7 @@ concurrency:
|
||||
env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 4
|
||||
HF_HOME: /mnt/cache
|
||||
MKL_NUM_THREADS: 4
|
||||
PYTEST_TIMEOUT: 60
|
||||
|
||||
|
||||
@@ -74,11 +74,11 @@ jobs:
|
||||
env:
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||
|
||||
run: |
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "not Flax and not Onnx and not compile" \
|
||||
-s -v -k "not Flax and not Onnx" \
|
||||
--make-reports=tests_${{ matrix.config.report }} \
|
||||
tests/
|
||||
|
||||
@@ -113,50 +113,6 @@ jobs:
|
||||
name: ${{ matrix.config.report }}_test_reports
|
||||
path: reports
|
||||
|
||||
run_torch_compile_tests:
|
||||
name: PyTorch Compile CUDA tests
|
||||
|
||||
runs-on: docker-gpu
|
||||
|
||||
container:
|
||||
image: diffusers/diffusers-pytorch-compile-cuda
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
||||
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install -e .[quality,test,training]
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Run example tests on GPU
|
||||
env:
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
run: |
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: cat reports/tests_torch_compile_cuda_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: torch_compile_test_reports
|
||||
path: reports
|
||||
|
||||
run_examples_tests:
|
||||
name: Examples PyTorch CUDA tests on Ubuntu
|
||||
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
LABEL repository="diffusers"
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt update && \
|
||||
apt install -y bash \
|
||||
build-essential \
|
||||
git \
|
||||
git-lfs \
|
||||
curl \
|
||||
ca-certificates \
|
||||
libsndfile1-dev \
|
||||
libgl1 \
|
||||
python3.9 \
|
||||
python3-pip \
|
||||
python3.9-venv && \
|
||||
rm -rf /var/lib/apt/lists
|
||||
|
||||
# make sure to use venv
|
||||
RUN python3 -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
invisible_watermark && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
accelerate \
|
||||
datasets \
|
||||
hf-doc-builder \
|
||||
huggingface-hub \
|
||||
Jinja2 \
|
||||
librosa \
|
||||
numpy \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers \
|
||||
omegaconf \
|
||||
pytorch-lightning \
|
||||
xformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
@@ -67,30 +67,30 @@ By default, `tqdm` progress bars are displayed during model download. [`logging.
|
||||
|
||||
## Base setters
|
||||
|
||||
[[autodoc]] utils.logging.set_verbosity_error
|
||||
[[autodoc]] logging.set_verbosity_error
|
||||
|
||||
[[autodoc]] utils.logging.set_verbosity_warning
|
||||
[[autodoc]] logging.set_verbosity_warning
|
||||
|
||||
[[autodoc]] utils.logging.set_verbosity_info
|
||||
[[autodoc]] logging.set_verbosity_info
|
||||
|
||||
[[autodoc]] utils.logging.set_verbosity_debug
|
||||
[[autodoc]] logging.set_verbosity_debug
|
||||
|
||||
## Other functions
|
||||
|
||||
[[autodoc]] utils.logging.get_verbosity
|
||||
[[autodoc]] logging.get_verbosity
|
||||
|
||||
[[autodoc]] utils.logging.set_verbosity
|
||||
[[autodoc]] logging.set_verbosity
|
||||
|
||||
[[autodoc]] utils.logging.get_logger
|
||||
[[autodoc]] logging.get_logger
|
||||
|
||||
[[autodoc]] utils.logging.enable_default_handler
|
||||
[[autodoc]] logging.enable_default_handler
|
||||
|
||||
[[autodoc]] utils.logging.disable_default_handler
|
||||
[[autodoc]] logging.disable_default_handler
|
||||
|
||||
[[autodoc]] utils.logging.enable_explicit_format
|
||||
[[autodoc]] logging.enable_explicit_format
|
||||
|
||||
[[autodoc]] utils.logging.reset_format
|
||||
[[autodoc]] logging.reset_format
|
||||
|
||||
[[autodoc]] utils.logging.enable_progress_bar
|
||||
[[autodoc]] logging.enable_progress_bar
|
||||
|
||||
[[autodoc]] utils.logging.disable_progress_bar
|
||||
[[autodoc]] logging.disable_progress_bar
|
||||
|
||||
@@ -34,7 +34,13 @@ Make sure to check out the Schedulers [guide](/using-diffusers/schedulers) to le
|
||||
- load_lora_weights
|
||||
- save_lora_weights
|
||||
|
||||
## StableDiffusionPipelineOutput
|
||||
[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
|
||||
|
||||
## StableDiffusionXLInstructPix2PixPipeline
|
||||
[[autodoc]] StableDiffusionXLInstructPix2PixPipeline
|
||||
- __call__
|
||||
- all
|
||||
|
||||
## StableDiffusionXLPipelineOutput
|
||||
[[autodoc]] pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput
|
||||
@@ -31,5 +31,5 @@ Make sure to check out the Schedulers [guide](/using-diffusers/schedulers) to le
|
||||
- __call__
|
||||
|
||||
## StableDiffusionSafePipelineOutput
|
||||
[[autodoc]] pipelines.semantic_stable_diffusion.pipeline_output.SemanticStableDiffusionPipelineOutput
|
||||
- all
|
||||
[[autodoc]] pipelines.semantic_stable_diffusion.SemanticStableDiffusionPipelineOutput
|
||||
- all
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
<img src="https://github.com/dome272/Wuerstchen/assets/61938694/0617c863-165a-43ee-9303-2a17299a0cf9">
|
||||
|
||||
[Würstchen: Efficient Pretraining of Text-to-Image Models](https://huggingface.co/papers/2306.00637) is by Pablo Pernias, Dominic Rampas, Mats L. Richter and Christopher Pal and Marc Aubreville.
|
||||
[Würstchen: Efficient Pretraining of Text-to-Image Models](https://huggingface.co/papers/2306.00637) is by Pablo Pernias, Dominic Rampas, and Marc Aubreville.
|
||||
|
||||
The abstract from the paper is:
|
||||
|
||||
@@ -134,16 +134,3 @@ The original codebase, as well as experimental ideas, can be found at [dome272/W
|
||||
[[autodoc]] WuerstchenDecoderPipeline
|
||||
- all
|
||||
- __call__
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@misc{pernias2023wuerstchen,
|
||||
title={Wuerstchen: Efficient Pretraining of Text-to-Image Models},
|
||||
author={Pablo Pernias and Dominic Rampas and Mats L. Richter and Christopher Pal and Marc Aubreville},
|
||||
year={2023},
|
||||
eprint={2306.00637},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -3,7 +3,7 @@ import inspect
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
from torchvision import transforms
|
||||
|
||||
@@ -2,7 +2,7 @@ import inspect
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import warnings
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from accelerate import Accelerator
|
||||
|
||||
@@ -2,7 +2,7 @@ import inspect
|
||||
from typing import Callable, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import re
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from packaging import version
|
||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||
|
||||
@@ -3,7 +3,7 @@ import re
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from packaging import version
|
||||
from transformers import CLIPImageProcessor, CLIPTokenizer
|
||||
|
||||
@@ -1029,7 +1029,7 @@ class SDXLLongPromptWeightingPipeline(DiffusionPipeline, FromSingleFileMixin, Lo
|
||||
Guidance rescale factor should fix overexposure when using zero terminal SNR.
|
||||
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
||||
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
||||
`original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
|
||||
explained in section 2.2 of
|
||||
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
||||
@@ -1039,7 +1039,7 @@ class SDXLLongPromptWeightingPipeline(DiffusionPipeline, FromSingleFileMixin, Lo
|
||||
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
For most cases, `target_size` should be set to the desired height and width of the generated image. If
|
||||
not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
|
||||
not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in
|
||||
section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
|
||||
from diffusers import StableDiffusionImg2ImgPipeline
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import kornia
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from packaging import version
|
||||
from transformers import CLIPFeatureExtractor, CLIPVisionModelWithProjection
|
||||
|
||||
@@ -16,7 +16,7 @@ import inspect
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from packaging import version
|
||||
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
||||
|
||||
@@ -24,7 +24,7 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
import onnx
|
||||
import onnx_graphsurgeon as gs
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import tensorrt as trt
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
@@ -24,7 +24,7 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
import onnx
|
||||
import onnx_graphsurgeon as gs
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import tensorrt as trt
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import (
|
||||
CLIPImageProcessor,
|
||||
|
||||
@@ -16,7 +16,7 @@ import math
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import CLIPTextModel, CLIPTokenizer
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import inspect
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
from transformers import (
|
||||
|
||||
@@ -907,17 +907,10 @@ def main():
|
||||
|
||||
if args.snr_gamma is not None:
|
||||
snr = jnp.array(compute_snr(timesteps))
|
||||
base_weights = jnp.where(snr < args.snr_gamma, snr, jnp.ones_like(snr) * args.snr_gamma) / snr
|
||||
snr_loss_weights = jnp.where(snr < args.snr_gamma, snr, jnp.ones_like(snr) * args.snr_gamma) / snr
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
snr_loss_weights = base_weights + 1
|
||||
else:
|
||||
# Epsilon and sample prediction use the base weights.
|
||||
snr_loss_weights = base_weights
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
snr_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# velocity objective prediction requires SNR weights to be floored to a min value of 1.
|
||||
snr_loss_weights = snr_loss_weights + 1
|
||||
loss = loss * snr_loss_weights
|
||||
|
||||
loss = loss.mean()
|
||||
|
||||
@@ -801,22 +801,9 @@ def main():
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# Velocity objective needs to be floored to an SNR weight of one.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -654,22 +654,9 @@ def main():
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# Velocity objective needs to be floored to an SNR weight of one.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -685,22 +685,9 @@ def main():
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# Velocity objective needs to be floored to an SNR weight of one.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -833,22 +833,9 @@ def main():
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# Velocity objective needs to be floored to an SNR weight of one.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -872,21 +872,12 @@ def main():
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# velocity objective prediction requires SNR weights to be floored to a min value of 1.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample prediction use the base weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
mse_loss_weights = mse_loss_weights + 1
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -952,22 +952,12 @@ def main():
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# Velocity objective needs to be floored to an SNR weight of one.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# velocity objective prediction requires SNR weights to be floored to a min value of 1.
|
||||
mse_loss_weights = mse_loss_weights + 1
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -783,22 +783,12 @@ def main():
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# Velocity objective needs to be floored to an SNR weight of one.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# velocity objective prediction requires SNR weights to be floored to a min value of 1.
|
||||
mse_loss_weights = mse_loss_weights + 1
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -1072,22 +1072,12 @@ def main(args):
|
||||
# Since we predict the noise instead of x_0, the original formulation is slightly changed.
|
||||
# This is discussed in Section 4.2 of the same paper.
|
||||
snr = compute_snr(timesteps)
|
||||
base_weight = (
|
||||
mse_loss_weights = (
|
||||
torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr
|
||||
)
|
||||
|
||||
if noise_scheduler.config.prediction_type == "v_prediction":
|
||||
# Velocity objective needs to be floored to an SNR weight of one.
|
||||
mse_loss_weights = base_weight + 1
|
||||
else:
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# velocity objective prediction requires SNR weights to be floored to a min value of 1.
|
||||
mse_loss_weights = mse_loss_weights + 1
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -325,55 +325,6 @@ def parse_args(input_args=None):
|
||||
parser.add_argument(
|
||||
"--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timestep_bias_strategy",
|
||||
type=str,
|
||||
default="none",
|
||||
choices=["earlier", "later", "range", "none"],
|
||||
help=(
|
||||
"The timestep bias strategy, which may help direct the model toward learning low or high frequency details."
|
||||
" Choices: ['earlier', 'later', 'range', 'none']."
|
||||
" The default is 'none', which means no bias is applied, and training proceeds normally."
|
||||
" The value of 'later' will increase the frequency of the model's final training timesteps."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timestep_bias_multiplier",
|
||||
type=float,
|
||||
default=1.0,
|
||||
help=(
|
||||
"The multiplier for the bias. Defaults to 1.0, which means no bias is applied."
|
||||
" A value of 2.0 will double the weight of the bias, and a value of 0.5 will halve it."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timestep_bias_begin",
|
||||
type=int,
|
||||
default=0,
|
||||
help=(
|
||||
"When using `--timestep_bias_strategy=range`, the beginning (inclusive) timestep to bias."
|
||||
" Defaults to zero, which equates to having no specific bias."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timestep_bias_end",
|
||||
type=int,
|
||||
default=1000,
|
||||
help=(
|
||||
"When using `--timestep_bias_strategy=range`, the final timestep (inclusive) to bias."
|
||||
" Defaults to 1000, which is the number of timesteps that Stable Diffusion is trained on."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timestep_bias_portion",
|
||||
type=float,
|
||||
default=0.25,
|
||||
help=(
|
||||
"The portion of timesteps to bias. Defaults to 0.25, which 25% of timesteps will be biased."
|
||||
" A value of 0.5 will bias one half of the timesteps. The value provided for `--timestep_bias_strategy` determines"
|
||||
" whether the biased portions are in the earlier or later timesteps."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--snr_gamma",
|
||||
type=float,
|
||||
@@ -528,47 +479,6 @@ def compute_vae_encodings(batch, vae):
|
||||
return {"model_input": model_input.cpu()}
|
||||
|
||||
|
||||
def generate_timestep_weights(args, num_timesteps):
|
||||
weights = torch.ones(num_timesteps)
|
||||
|
||||
# Determine the indices to bias
|
||||
num_to_bias = int(args.timestep_bias_portion * num_timesteps)
|
||||
|
||||
if args.timestep_bias_strategy == "later":
|
||||
bias_indices = slice(-num_to_bias, None)
|
||||
elif args.timestep_bias_strategy == "earlier":
|
||||
bias_indices = slice(0, num_to_bias)
|
||||
elif args.timestep_bias_strategy == "range":
|
||||
# Out of the possible 1000 timesteps, we might want to focus on eg. 200-500.
|
||||
range_begin = args.timestep_bias_begin
|
||||
range_end = args.timestep_bias_end
|
||||
if range_begin < 0:
|
||||
raise ValueError(
|
||||
"When using the range strategy for timestep bias, you must provide a beginning timestep greater or equal to zero."
|
||||
)
|
||||
if range_end > num_timesteps:
|
||||
raise ValueError(
|
||||
"When using the range strategy for timestep bias, you must provide an ending timestep smaller than the number of timesteps."
|
||||
)
|
||||
bias_indices = slice(range_begin, range_end)
|
||||
else: # 'none' or any other string
|
||||
return weights
|
||||
if args.timestep_bias_multiplier <= 0:
|
||||
return ValueError(
|
||||
"The parameter --timestep_bias_multiplier is not intended to be used to disable the training of specific timesteps."
|
||||
" If it was intended to disable timestep bias, use `--timestep_bias_strategy none` instead."
|
||||
" A timestep bias multiplier less than or equal to 0 is not allowed."
|
||||
)
|
||||
|
||||
# Apply the bias
|
||||
weights[bias_indices] *= args.timestep_bias_multiplier
|
||||
|
||||
# Normalize
|
||||
weights /= weights.sum()
|
||||
|
||||
return weights
|
||||
|
||||
|
||||
def main(args):
|
||||
logging_dir = Path(args.output_dir, args.logging_dir)
|
||||
|
||||
@@ -1025,18 +935,11 @@ def main(args):
|
||||
)
|
||||
|
||||
bsz = model_input.shape[0]
|
||||
if args.timestep_bias_strategy == "none":
|
||||
# Sample a random timestep for each image without bias.
|
||||
timesteps = torch.randint(
|
||||
0, noise_scheduler.config.num_train_timesteps, (bsz,), device=model_input.device
|
||||
)
|
||||
else:
|
||||
# Sample a random timestep for each image, potentially biased by the timestep weights.
|
||||
# Biasing the timestep weights allows us to spend less time training irrelevant timesteps.
|
||||
weights = generate_timestep_weights(args, noise_scheduler.config.num_train_timesteps).to(
|
||||
model_input.device
|
||||
)
|
||||
timesteps = torch.multinomial(weights, bsz, replacement=True).long()
|
||||
# Sample a random timestep for each image
|
||||
timesteps = torch.randint(
|
||||
0, noise_scheduler.config.num_train_timesteps, (bsz,), device=model_input.device
|
||||
)
|
||||
timesteps = timesteps.long()
|
||||
|
||||
# Add noise to the model input according to the noise magnitude at each timestep
|
||||
# (this is the forward diffusion process)
|
||||
@@ -1100,11 +1003,6 @@ def main(args):
|
||||
# Epsilon and sample both use the same loss weights.
|
||||
mse_loss_weights = base_weight
|
||||
|
||||
# For zero-terminal SNR, we have to handle the case where a sigma of Zero results in a Inf value.
|
||||
# When we run this, the MSE loss weights for this timestep is set unconditionally to 1.
|
||||
# If we do not run this, the loss value will go to NaN almost immediately, usually within one step.
|
||||
mse_loss_weights[snr == 0] = 1.0
|
||||
|
||||
# We first calculate the original loss. Then we mean over the non-batch dimensions and
|
||||
# rebalance the sample-wise losses with their respective loss weights.
|
||||
# Finally, we take the mean of the rebalanced loss.
|
||||
|
||||
@@ -3,7 +3,6 @@ __version__ = "0.22.0.dev0"
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from .utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
is_flax_available,
|
||||
@@ -415,7 +414,7 @@ except OptionalDependencyNotAvailable:
|
||||
else:
|
||||
_import_structure["pipelines"].extend(["MidiProcessor"])
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .configuration_utils import ConfigMixin
|
||||
|
||||
try:
|
||||
|
||||
@@ -16,7 +16,7 @@ import warnings
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
@@ -48,7 +48,7 @@ class VaeImageProcessor(ConfigMixin):
|
||||
Resampling filter to use when resizing the image.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether to normalize the image to [-1,1].
|
||||
do_binarize (`bool`, *optional*, defaults to `False`):
|
||||
do_binarize (`bool`, *optional*, defaults to `True`):
|
||||
Whether to binarize the image to 0/1.
|
||||
do_convert_rgb (`bool`, *optional*, defaults to be `False`):
|
||||
Whether to convert the images to RGB format.
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ..utils import DIFFUSERS_SLOW_IMPORT, _LazyModule, is_flax_available, is_torch_available
|
||||
from ..utils import _LazyModule, is_flax_available, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {}
|
||||
@@ -43,7 +43,7 @@ if is_flax_available():
|
||||
_import_structure["vae_flax"] = ["FlaxAutoencoderKL"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
if is_torch_available():
|
||||
from .adapter import MultiAdapter, T2IAdapter
|
||||
from .autoencoder_asym_kl import AsymmetricAutoencoderKL
|
||||
|
||||
@@ -252,10 +252,7 @@ class T2IAdapter(ModelMixin, ConfigMixin):
|
||||
elif adapter_type == "light_adapter":
|
||||
self.adapter = LightAdapter(in_channels, channels, num_res_blocks, downscale_factor)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported adapter_type: '{adapter_type}'. Choose either 'full_adapter' or "
|
||||
"'full_adapter_xl' or 'light_adapter'."
|
||||
)
|
||||
raise ValueError(f"unknown adapter_type: {type}. Choose either 'full_adapter' or 'simple_adapter'")
|
||||
|
||||
def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
|
||||
return self.adapter(x)
|
||||
@@ -334,8 +331,8 @@ class FullAdapterXL(nn.Module):
|
||||
self.body.append(AdapterBlock(channels[i], channels[i], num_res_blocks))
|
||||
|
||||
self.body = nn.ModuleList(self.body)
|
||||
# XL has only one downsampling AdapterBlock.
|
||||
self.total_downscale_factor = downscale_factor * 2
|
||||
# XL has one fewer downsampling
|
||||
self.total_downscale_factor = downscale_factor * 2 ** (len(channels) - 2)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
|
||||
x = self.unshuffle(x)
|
||||
|
||||
@@ -945,9 +945,6 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
|
||||
is_adapter = mid_block_additional_residual is None and down_block_additional_residuals is not None
|
||||
|
||||
down_block_res_samples = (sample,)
|
||||
print("emb", emb.abs().sum())
|
||||
print("sample", sample.abs().sum())
|
||||
|
||||
for downsample_block in self.down_blocks:
|
||||
if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention:
|
||||
# For t2i-adapter CrossAttnDownBlock2D
|
||||
|
||||
@@ -134,18 +134,8 @@ class FlaxUNet2DConditionModel(nn.Module, FlaxModelMixin, ConfigMixin):
|
||||
|
||||
added_cond_kwargs = None
|
||||
if self.addition_embed_type == "text_time":
|
||||
# we retrieve the expected `text_embeds_dim` by first checking if the architecture is a refiner
|
||||
# or non-refiner architecture and then by "reverse-computing" from `projection_class_embeddings_input_dim`
|
||||
is_refiner = (
|
||||
5 * self.config.addition_time_embed_dim + self.config.cross_attention_dim
|
||||
== self.config.projection_class_embeddings_input_dim
|
||||
)
|
||||
num_micro_conditions = 5 if is_refiner else 6
|
||||
|
||||
text_embeds_dim = self.config.projection_class_embeddings_input_dim - (
|
||||
num_micro_conditions * self.config.addition_time_embed_dim
|
||||
)
|
||||
|
||||
# TODO: how to get this from the config? It's no longer cross_attention_dim
|
||||
text_embeds_dim = 1280
|
||||
time_ids_channels = self.projection_class_embeddings_input_dim - text_embeds_dim
|
||||
time_ids_dims = time_ids_channels // self.addition_time_embed_dim
|
||||
added_cond_kwargs = {
|
||||
@@ -377,11 +367,6 @@ class FlaxUNet2DConditionModel(nn.Module, FlaxModelMixin, ConfigMixin):
|
||||
sample = jnp.transpose(sample, (0, 2, 3, 1))
|
||||
sample = self.conv_in(sample)
|
||||
|
||||
if not isinstance(t_emb, jax._src.interpreters.partial_eval.DynamicJaxprTracer):
|
||||
import torch; import numpy as np
|
||||
print("t_emb", torch.from_numpy(np.asarray(t_emb)).abs().sum())
|
||||
print("sample", torch.from_numpy(np.asarray(sample)).abs().sum())
|
||||
|
||||
# 3. down
|
||||
down_block_res_samples = (sample,)
|
||||
for down_block in self.down_blocks:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ..utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -253,7 +252,7 @@ except OptionalDependencyNotAvailable:
|
||||
else:
|
||||
_import_structure["spectrogram_diffusion"] = ["MidiProcessor", "SpectrogramDiffusionPipeline"]
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not is_torch_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -27,7 +26,7 @@ else:
|
||||
|
||||
_import_structure["pipeline_output"] = ["AltDiffusionPipelineOutput"]
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -29,8 +29,7 @@ from ...utils import deprecate, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
|
||||
from .pipeline_output import AltDiffusionPipelineOutput
|
||||
from . import AltDiffusionPipelineOutput, RobertaSeriesModelWithTransformation
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
@@ -16,7 +16,7 @@ import inspect
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from packaging import version
|
||||
from transformers import CLIPImageProcessor, XLMRobertaTokenizer
|
||||
@@ -31,8 +31,7 @@ from ...utils import PIL_INTERPOLATION, deprecate, logging, replace_example_docs
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from .modeling_roberta_series import RobertaSeriesModelWithTransformation
|
||||
from .pipeline_output import AltDiffusionPipelineOutput
|
||||
from . import AltDiffusionPipelineOutput, RobertaSeriesModelWithTransformation
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
|
||||
from ...utils import (
|
||||
BaseOutput,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {
|
||||
@@ -8,7 +8,7 @@ _import_structure = {
|
||||
"pipeline_audio_diffusion": ["AudioDiffusionPipeline"],
|
||||
}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .mel import Mel
|
||||
from .pipeline_audio_diffusion import AudioDiffusionPipeline
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
is_torch_available,
|
||||
@@ -26,7 +25,7 @@ else:
|
||||
_import_structure["pipeline_audioldm"] = ["AudioLDMPipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -26,7 +25,7 @@ else:
|
||||
_import_structure["pipeline_audioldm2"] = ["AudioLDM2Pipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -38,7 +38,7 @@ from diffusers.utils import numpy_to_pil
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
import PIL.Image
|
||||
import PIL
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# limitations under the License.
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPTokenizer
|
||||
|
||||
@@ -98,8 +98,6 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
Position of the context token in the text encoder.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: CLIPTokenizer,
|
||||
@@ -157,9 +155,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
latents = latents * self.scheduler.init_noise_sigma
|
||||
return latents
|
||||
|
||||
def encode_prompt(self, query_embeds, prompt, device=None):
|
||||
device = device or self._execution_device
|
||||
|
||||
def encode_prompt(self, query_embeds, prompt):
|
||||
# embeddings for prompt, with query_embeds as context
|
||||
max_len = self.text_encoder.text_model.config.max_position_embeddings
|
||||
max_len -= self.qformer.config.num_query_tokens
|
||||
@@ -170,7 +166,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
truncation=True,
|
||||
max_length=max_len,
|
||||
return_tensors="pt",
|
||||
).to(device)
|
||||
).to(self.device)
|
||||
|
||||
batch_size = query_embeds.shape[0]
|
||||
ctx_begin_pos = [self.config.ctx_begin_pos] * batch_size
|
||||
@@ -253,12 +249,11 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
Returns:
|
||||
[`~pipelines.ImagePipelineOutput`] or `tuple`
|
||||
"""
|
||||
device = self._execution_device
|
||||
|
||||
reference_image = self.image_processor.preprocess(
|
||||
reference_image, image_mean=self.config.mean, image_std=self.config.std, return_tensors="pt"
|
||||
)["pixel_values"]
|
||||
reference_image = reference_image.to(device)
|
||||
reference_image = reference_image.to(self.device)
|
||||
|
||||
if isinstance(prompt, str):
|
||||
prompt = [prompt]
|
||||
@@ -276,7 +271,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
prompt_reps=prompt_reps,
|
||||
)
|
||||
query_embeds = self.get_query_embeddings(reference_image, source_subject_category)
|
||||
text_embeddings = self.encode_prompt(query_embeds, prompt, device)
|
||||
text_embeddings = self.encode_prompt(query_embeds, prompt)
|
||||
do_classifier_free_guidance = guidance_scale > 1.0
|
||||
if do_classifier_free_guidance:
|
||||
max_length = self.text_encoder.text_model.config.max_position_embeddings
|
||||
@@ -288,7 +283,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
return_tensors="pt",
|
||||
)
|
||||
uncond_embeddings = self.text_encoder(
|
||||
input_ids=uncond_input.input_ids.to(device),
|
||||
input_ids=uncond_input.input_ids.to(self.device),
|
||||
ctx_embeddings=None,
|
||||
)[0]
|
||||
# For classifier free guidance, we need to do two forward passes.
|
||||
@@ -305,7 +300,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
generator=generator,
|
||||
latents=latents,
|
||||
dtype=self.unet.dtype,
|
||||
device=device,
|
||||
device=self.device,
|
||||
)
|
||||
# set timesteps
|
||||
extra_set_kwargs = {}
|
||||
@@ -335,13 +330,9 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
||||
t,
|
||||
latents,
|
||||
)["prev_sample"]
|
||||
|
||||
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
||||
image = self.image_processor.postprocess(image, output_type=output_type)
|
||||
|
||||
# Offload all models
|
||||
self.maybe_free_model_hooks()
|
||||
|
||||
if not return_dict:
|
||||
return (image,)
|
||||
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
_LazyModule,
|
||||
)
|
||||
|
||||
|
||||
_import_structure = {"pipeline_consistency_models": ["ConsistencyModelPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_consistency_models import ConsistencyModelPipeline
|
||||
|
||||
else:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -41,7 +40,7 @@ else:
|
||||
_import_structure["pipeline_flax_controlnet"] = ["FlaxStableDiffusionControlNetPipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -34,7 +34,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion import StableDiffusionPipelineOutput
|
||||
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from .multicontrolnet import MultiControlNetModel
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPTokenizer
|
||||
|
||||
@@ -107,8 +107,6 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
Position of the context token in the text encoder.
|
||||
"""
|
||||
|
||||
model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tokenizer: CLIPTokenizer,
|
||||
@@ -168,9 +166,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
latents = latents * self.scheduler.init_noise_sigma
|
||||
return latents
|
||||
|
||||
def encode_prompt(self, query_embeds, prompt, device=None):
|
||||
device = device or self._execution_device
|
||||
|
||||
def encode_prompt(self, query_embeds, prompt):
|
||||
# embeddings for prompt, with query_embeds as context
|
||||
max_len = self.text_encoder.text_model.config.max_position_embeddings
|
||||
max_len -= self.qformer.config.num_query_tokens
|
||||
@@ -181,7 +177,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
truncation=True,
|
||||
max_length=max_len,
|
||||
return_tensors="pt",
|
||||
).to(device)
|
||||
).to(self.device)
|
||||
|
||||
batch_size = query_embeds.shape[0]
|
||||
ctx_begin_pos = [self.config.ctx_begin_pos] * batch_size
|
||||
@@ -301,12 +297,11 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
Returns:
|
||||
[`~pipelines.ImagePipelineOutput`] or `tuple`
|
||||
"""
|
||||
device = self._execution_device
|
||||
|
||||
reference_image = self.image_processor.preprocess(
|
||||
reference_image, image_mean=self.config.mean, image_std=self.config.std, return_tensors="pt"
|
||||
)["pixel_values"]
|
||||
reference_image = reference_image.to(device)
|
||||
reference_image = reference_image.to(self.device)
|
||||
|
||||
if isinstance(prompt, str):
|
||||
prompt = [prompt]
|
||||
@@ -324,7 +319,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
prompt_reps=prompt_reps,
|
||||
)
|
||||
query_embeds = self.get_query_embeddings(reference_image, source_subject_category)
|
||||
text_embeddings = self.encode_prompt(query_embeds, prompt, device)
|
||||
text_embeddings = self.encode_prompt(query_embeds, prompt)
|
||||
# 3. unconditional embedding
|
||||
do_classifier_free_guidance = guidance_scale > 1.0
|
||||
if do_classifier_free_guidance:
|
||||
@@ -337,7 +332,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
return_tensors="pt",
|
||||
)
|
||||
uncond_embeddings = self.text_encoder(
|
||||
input_ids=uncond_input.input_ids.to(device),
|
||||
input_ids=uncond_input.input_ids.to(self.device),
|
||||
ctx_embeddings=None,
|
||||
)[0]
|
||||
# For classifier free guidance, we need to do two forward passes.
|
||||
@@ -353,7 +348,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
generator=generator,
|
||||
latents=latents,
|
||||
dtype=self.unet.dtype,
|
||||
device=device,
|
||||
device=self.device,
|
||||
)
|
||||
# set timesteps
|
||||
extra_set_kwargs = {}
|
||||
@@ -404,9 +399,6 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
||||
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
||||
image = self.image_processor.postprocess(image, output_type=output_type)
|
||||
|
||||
# Offload all models
|
||||
self.maybe_free_model_hooks()
|
||||
|
||||
if not return_dict:
|
||||
return (image,)
|
||||
|
||||
|
||||
@@ -16,11 +16,13 @@ import inspect
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
|
||||
|
||||
from diffusers.pipelines.stable_diffusion_xl import StableDiffusionXLPipelineOutput
|
||||
|
||||
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
|
||||
from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
|
||||
@@ -39,7 +41,6 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||
from .multicontrolnet import MultiControlNetModel
|
||||
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||
from ..stable_diffusion_xl import StableDiffusionXLPipelineOutput
|
||||
|
||||
|
||||
if is_invisible_watermark_available():
|
||||
@@ -863,7 +863,7 @@ class StableDiffusionXLControlNetPipeline(
|
||||
The percentage of total steps at which the ControlNet stops applying.
|
||||
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
||||
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
||||
`original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
|
||||
explained in section 2.2 of
|
||||
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
||||
@@ -873,7 +873,7 @@ class StableDiffusionXLControlNetPipeline(
|
||||
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
For most cases, `target_size` should be set to the desired height and width of the generated image. If
|
||||
not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
|
||||
not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in
|
||||
section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
To negatively condition the generation process based on a specific image resolution. Part of SDXL's
|
||||
|
||||
@@ -41,7 +41,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
||||
from ..stable_diffusion_xl import StableDiffusionXLPipelineOutput
|
||||
|
||||
|
||||
if is_invisible_watermark_available():
|
||||
@@ -1028,7 +1028,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
||||
The percentage of total steps at which the controlnet stops applying.
|
||||
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
||||
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
||||
`original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
|
||||
explained in section 2.2 of
|
||||
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
||||
@@ -1038,7 +1038,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
||||
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
For most cases, `target_size` should be set to the desired height and width of the generated image. If
|
||||
not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
|
||||
not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in
|
||||
section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
||||
negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
||||
To negatively condition the generation process based on a specific image resolution. Part of SDXL's
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {"pipeline_dance_diffusion": ["DanceDiffusionPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_dance_diffusion import DanceDiffusionPipeline
|
||||
else:
|
||||
import sys
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {"pipeline_ddim": ["DDIMPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_ddim import DDIMPipeline
|
||||
else:
|
||||
import sys
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
_LazyModule,
|
||||
)
|
||||
|
||||
|
||||
_import_structure = {"pipeline_ddpm": ["DDPMPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_ddpm import DDPMPipeline
|
||||
|
||||
else:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -43,7 +42,7 @@ else:
|
||||
_import_structure["watermark"] = ["IFWatermarker"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -20,7 +20,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import IFPipelineOutput
|
||||
from . import IFPipelineOutput
|
||||
from .safety_checker import IFSafetyChecker
|
||||
from .watermark import IFWatermarker
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import urllib.parse as ul
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
||||
|
||||
@@ -23,7 +23,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import IFPipelineOutput
|
||||
from . import IFPipelineOutput
|
||||
from .safety_checker import IFSafetyChecker
|
||||
from .watermark import IFWatermarker
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import urllib.parse as ul
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
||||
@@ -24,7 +24,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import IFPipelineOutput
|
||||
from . import IFPipelineOutput
|
||||
from .safety_checker import IFSafetyChecker
|
||||
from .watermark import IFWatermarker
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import urllib.parse as ul
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
||||
|
||||
@@ -23,7 +23,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import IFPipelineOutput
|
||||
from . import IFPipelineOutput
|
||||
from .safety_checker import IFSafetyChecker
|
||||
from .watermark import IFWatermarker
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import urllib.parse as ul
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
||||
@@ -24,7 +24,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import IFPipelineOutput
|
||||
from . import IFPipelineOutput
|
||||
from .safety_checker import IFSafetyChecker
|
||||
from .watermark import IFWatermarker
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import urllib.parse as ul
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
||||
@@ -23,7 +23,7 @@ from ...utils import (
|
||||
)
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import IFPipelineOutput
|
||||
from . import IFPipelineOutput
|
||||
from .safety_checker import IFSafetyChecker
|
||||
from .watermark import IFWatermarker
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
|
||||
from ...utils import BaseOutput
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import List
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {"pipeline_dit": ["DiTPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_dit import DiTPipeline
|
||||
|
||||
else:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -33,7 +32,7 @@ else:
|
||||
_import_structure["text_encoder"] = ["MultilingualCLIP"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# limitations under the License.
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import (
|
||||
CLIPImageProcessor,
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import (
|
||||
|
||||
@@ -16,7 +16,7 @@ from copy import deepcopy
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from packaging import version
|
||||
|
||||
@@ -16,7 +16,7 @@ from dataclasses import dataclass
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -35,7 +34,7 @@ else:
|
||||
_import_structure["pipeline_kandinsky2_2_prior_emb2emb"] = ["KandinskyV22PriorEmb2EmbPipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from copy import deepcopy
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from packaging import version
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -25,7 +24,7 @@ else:
|
||||
_import_structure["pipeline_latent_diffusion_superresolution"] = ["LDMSuperResolutionPipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
+1
-1
@@ -2,7 +2,7 @@ import inspect
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
import torch.utils.checkpoint
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {"pipeline_latent_diffusion_uncond": ["LDMPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_latent_diffusion_uncond import LDMPipeline
|
||||
else:
|
||||
import sys
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -25,7 +24,7 @@ else:
|
||||
_import_structure["pipeline_musicldm"] = ["MusicLDMPipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -6,7 +6,6 @@ import PIL
|
||||
from PIL import Image
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -30,7 +29,7 @@ else:
|
||||
_import_structure["pipeline_paint_by_example"] = ["PaintByExamplePipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -16,7 +16,7 @@ import inspect
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import flax
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
from flax.core.frozen_dict import FrozenDict
|
||||
from huggingface_hub import create_repo, snapshot_download
|
||||
from PIL import Image
|
||||
|
||||
@@ -26,7 +26,7 @@ from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from huggingface_hub import ModelCard, create_repo, hf_hub_download, model_info, snapshot_download
|
||||
from packaging import version
|
||||
@@ -670,98 +670,14 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
create_pr=create_pr,
|
||||
)
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
r"""
|
||||
Performs Pipeline dtype and/or device conversion. A torch.dtype and torch.device are inferred from the
|
||||
arguments of `self.to(*args, **kwargs).`
|
||||
|
||||
<Tip>
|
||||
|
||||
If the pipeline already has the correct torch.dtype and torch.device, then it is returned as is. Otherwise,
|
||||
the returned pipeline is a copy of self with the desired torch.dtype and torch.device.
|
||||
|
||||
</Tip>
|
||||
|
||||
|
||||
Here are the ways to call `to`:
|
||||
|
||||
- `to(dtype, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified
|
||||
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
|
||||
- `to(device, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified
|
||||
[`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device)
|
||||
- `to(device=None, dtype=None, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the
|
||||
specified [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) and
|
||||
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
|
||||
|
||||
Arguments:
|
||||
dtype (`torch.dtype`, *optional*):
|
||||
Returns a pipeline with the specified
|
||||
[`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype)
|
||||
device (`torch.Device`, *optional*):
|
||||
Returns a pipeline with the specified
|
||||
[`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device)
|
||||
silence_dtype_warnings (`str`, *optional*, defaults to `False`):
|
||||
Whether to omit warnings if the target `dtype` is not compatible with the target `device`.
|
||||
|
||||
Returns:
|
||||
[`DiffusionPipeline`]: The pipeline converted to specified `dtype` and/or `dtype`.
|
||||
"""
|
||||
|
||||
torch_dtype = kwargs.pop("torch_dtype", None)
|
||||
if torch_dtype is not None:
|
||||
deprecate("torch_dtype", "0.25.0", "")
|
||||
torch_device = kwargs.pop("torch_device", None)
|
||||
if torch_device is not None:
|
||||
deprecate("torch_device", "0.25.0", "")
|
||||
|
||||
dtype_kwarg = kwargs.pop("dtype", None)
|
||||
device_kwarg = kwargs.pop("device", None)
|
||||
silence_dtype_warnings = kwargs.pop("silence_dtype_warnings", False)
|
||||
|
||||
if torch_dtype is not None and dtype_kwarg is not None:
|
||||
raise ValueError(
|
||||
"You have passed both `torch_dtype` and `dtype` as a keyword argument. Please make sure to only pass `dtype`."
|
||||
)
|
||||
|
||||
dtype = torch_dtype or dtype_kwarg
|
||||
|
||||
if torch_device is not None and device_kwarg is not None:
|
||||
raise ValueError(
|
||||
"You have passed both `torch_device` and `device` as a keyword argument. Please make sure to only pass `device`."
|
||||
)
|
||||
|
||||
device = torch_device or device_kwarg
|
||||
|
||||
dtype_arg = None
|
||||
device_arg = None
|
||||
if len(args) == 1:
|
||||
if isinstance(args[0], torch.dtype):
|
||||
dtype_arg = args[0]
|
||||
else:
|
||||
device_arg = torch.device(args[0]) if args[0] is not None else None
|
||||
elif len(args) == 2:
|
||||
if isinstance(args[0], torch.dtype):
|
||||
raise ValueError(
|
||||
"When passing two arguments, make sure the first corresponds to `device` and the second to `dtype`."
|
||||
)
|
||||
device_arg = torch.device(args[0]) if args[0] is not None else None
|
||||
dtype_arg = args[1]
|
||||
elif len(args) > 2:
|
||||
raise ValueError("Please make sure to pass at most two arguments (`device` and `dtype`) `.to(...)`")
|
||||
|
||||
if dtype is not None and dtype_arg is not None:
|
||||
raise ValueError(
|
||||
"You have passed `dtype` both as an argument and as a keyword argument. Please only pass one of the two."
|
||||
)
|
||||
|
||||
dtype = dtype or dtype_arg
|
||||
|
||||
if device is not None and device_arg is not None:
|
||||
raise ValueError(
|
||||
"You have passed `device` both as an argument and as a keyword argument. Please only pass one of the two."
|
||||
)
|
||||
|
||||
device = device or device_arg
|
||||
def to(
|
||||
self,
|
||||
torch_device: Optional[Union[str, torch.device]] = None,
|
||||
torch_dtype: Optional[torch.dtype] = None,
|
||||
silence_dtype_warnings: bool = False,
|
||||
):
|
||||
if torch_device is None and torch_dtype is None:
|
||||
return self
|
||||
|
||||
# throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU.
|
||||
def module_is_sequentially_offloaded(module):
|
||||
@@ -782,14 +698,14 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
pipeline_is_sequentially_offloaded = any(
|
||||
module_is_sequentially_offloaded(module) for _, module in self.components.items()
|
||||
)
|
||||
if pipeline_is_sequentially_offloaded and device and torch.device(device).type == "cuda":
|
||||
if pipeline_is_sequentially_offloaded and torch_device and torch.device(torch_device).type == "cuda":
|
||||
raise ValueError(
|
||||
"It seems like you have activated sequential model offloading by calling `enable_sequential_cpu_offload`, but are now attempting to move the pipeline to GPU. This is not compatible with offloading. Please, move your pipeline `.to('cpu')` or consider removing the move altogether if you use sequential offloading."
|
||||
)
|
||||
|
||||
# Display a warning in this case (the operation succeeds but the benefits are lost)
|
||||
pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items())
|
||||
if pipeline_is_offloaded and device and torch.device(device).type == "cuda":
|
||||
if pipeline_is_offloaded and torch_device and torch.device(torch_device).type == "cuda":
|
||||
logger.warning(
|
||||
f"It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components {', '.join(self.components.keys())} to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading."
|
||||
)
|
||||
@@ -802,26 +718,26 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
for module in modules:
|
||||
is_loaded_in_8bit = hasattr(module, "is_loaded_in_8bit") and module.is_loaded_in_8bit
|
||||
|
||||
if is_loaded_in_8bit and dtype is not None:
|
||||
if is_loaded_in_8bit and torch_dtype is not None:
|
||||
logger.warning(
|
||||
f"The module '{module.__class__.__name__}' has been loaded in 8bit and conversion to {torch_dtype} is not yet supported. Module is still in 8bit precision."
|
||||
)
|
||||
|
||||
if is_loaded_in_8bit and device is not None:
|
||||
if is_loaded_in_8bit and torch_device is not None:
|
||||
logger.warning(
|
||||
f"The module '{module.__class__.__name__}' has been loaded in 8bit and moving it to {torch_dtype} via `.to()` is not yet supported. Module is still on {module.device}."
|
||||
)
|
||||
else:
|
||||
module.to(device, dtype)
|
||||
module.to(torch_device, torch_dtype)
|
||||
|
||||
if (
|
||||
module.dtype == torch.float16
|
||||
and str(device) in ["cpu"]
|
||||
and str(torch_device) in ["cpu"]
|
||||
and not silence_dtype_warnings
|
||||
and not is_offloaded
|
||||
):
|
||||
logger.warning(
|
||||
"Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It"
|
||||
"Pipelines loaded with `torch_dtype=torch.float16` cannot run with `cpu` device. It"
|
||||
" is not recommended to move them to `cpu` as running them will fail. Please make"
|
||||
" sure to use an accelerator to run the pipeline in inference, due to the lack of"
|
||||
" support for`float16` operations on this device in PyTorch. Please, remove the"
|
||||
@@ -844,21 +760,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
|
||||
return torch.device("cpu")
|
||||
|
||||
@property
|
||||
def dtype(self) -> torch.dtype:
|
||||
r"""
|
||||
Returns:
|
||||
`torch.dtype`: The torch dtype on which the pipeline is located.
|
||||
"""
|
||||
module_names, _ = self._get_signature_keys(self)
|
||||
modules = [getattr(self, n, None) for n in module_names]
|
||||
modules = [m for m in modules if isinstance(m, torch.nn.Module)]
|
||||
|
||||
for module in modules:
|
||||
return module.dtype
|
||||
|
||||
return torch.float32
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
|
||||
r"""
|
||||
@@ -1033,11 +934,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
# 1. Download the checkpoints and configs
|
||||
# use snapshot download here to get it working from from_pretrained
|
||||
if not os.path.isdir(pretrained_model_name_or_path):
|
||||
if pretrained_model_name_or_path.count("/") > 1:
|
||||
raise ValueError(
|
||||
f'The provided pretrained_model_name_or_path "{pretrained_model_name_or_path}"'
|
||||
" is neither a valid local path nor a valid repo id. Please check the parameter."
|
||||
)
|
||||
cached_folder = cls.download(
|
||||
pretrained_model_name_or_path,
|
||||
cache_dir=cache_dir,
|
||||
@@ -1326,19 +1222,12 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
return torch.device(module._hf_hook.execution_device)
|
||||
return self.device
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
||||
def enable_model_cpu_offload(self, gpu_id: int = 0, device: Union[torch.device, str] = "cuda"):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
||||
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
|
||||
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
|
||||
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
|
||||
|
||||
Arguments:
|
||||
gpu_id (`int`, *optional*):
|
||||
The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
|
||||
device (`torch.Device` or `str`, *optional*, defaults to "cuda"):
|
||||
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
||||
default to "cuda".
|
||||
"""
|
||||
if self.model_cpu_offload_seq is None:
|
||||
raise ValueError(
|
||||
@@ -1350,20 +1239,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
else:
|
||||
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
|
||||
|
||||
torch_device = torch.device(device)
|
||||
device_index = torch_device.index
|
||||
|
||||
if gpu_id is not None and device_index is not None:
|
||||
raise ValueError(
|
||||
f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
|
||||
f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}"
|
||||
)
|
||||
|
||||
# _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0
|
||||
self._offload_gpu_id = gpu_id or torch_device.index or self._offload_gpu_id or 0
|
||||
|
||||
device_type = torch_device.type
|
||||
device = torch.device(f"{device_type}:{self._offload_gpu_id}")
|
||||
device = torch.device(f"cuda:{gpu_id}")
|
||||
|
||||
if self.device.type != "cpu":
|
||||
self.to("cpu", silence_dtype_warnings=True)
|
||||
@@ -1398,10 +1274,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
|
||||
def maybe_free_model_hooks(self):
|
||||
r"""
|
||||
Function that offloads all components, removes all model hooks that were added when using
|
||||
`enable_model_cpu_offload` and then applies them again. In case the model has not been offloaded this function
|
||||
is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it
|
||||
functions correctly when applying enable_model_cpu_offload.
|
||||
TODO: Better doc string
|
||||
"""
|
||||
if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0:
|
||||
# `enable_model_cpu_offload` has not be called, so silently do nothing
|
||||
@@ -1415,40 +1288,21 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
# make sure the model is in the same state as before calling it
|
||||
self.enable_model_cpu_offload()
|
||||
|
||||
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
||||
def enable_sequential_cpu_offload(self, gpu_id: int = 0, device: Union[torch.device, str] = "cuda"):
|
||||
r"""
|
||||
Offloads all models to CPU using 🤗 Accelerate, significantly reducing memory usage. When called, the state
|
||||
dicts of all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are saved to CPU
|
||||
and then moved to `torch.device('meta')` and loaded to GPU only when their specific submodule has its `forward`
|
||||
method called. Offloading happens on a submodule basis. Memory savings are higher than with
|
||||
`enable_model_cpu_offload`, but performance is lower.
|
||||
|
||||
Arguments:
|
||||
gpu_id (`int`, *optional*):
|
||||
The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
|
||||
device (`torch.Device` or `str`, *optional*, defaults to "cuda"):
|
||||
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
||||
default to "cuda".
|
||||
"""
|
||||
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||
from accelerate import cpu_offload
|
||||
else:
|
||||
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||
|
||||
torch_device = torch.device(device)
|
||||
device_index = torch_device.index
|
||||
|
||||
if gpu_id is not None and device_index is not None:
|
||||
raise ValueError(
|
||||
f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
|
||||
f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}"
|
||||
)
|
||||
|
||||
# _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0
|
||||
self._offload_gpu_id = gpu_id or torch_device.index or self._offload_gpu_id or 0
|
||||
|
||||
device_type = torch_device.type
|
||||
device = torch.device(f"{device_type}:{self._offload_gpu_id}")
|
||||
if device == "cuda":
|
||||
device = torch.device(f"{device}:{gpu_id}")
|
||||
|
||||
if self.device.type != "cpu":
|
||||
self.to("cpu", silence_dtype_warnings=True)
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {"pipeline_pndm": ["PNDMPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_pndm import PNDMPipeline
|
||||
else:
|
||||
import sys
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {"pipeline_repaint": ["RePaintPipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_repaint import RePaintPipeline
|
||||
|
||||
else:
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
|
||||
from ...models import UNet2DModel
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import DIFFUSERS_SLOW_IMPORT, _LazyModule
|
||||
from ...utils import _LazyModule
|
||||
|
||||
|
||||
_import_structure = {"pipeline_score_sde_ve": ["ScoreSdeVePipeline"]}
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
from .pipeline_score_sde_ve import ScoreSdeVePipeline
|
||||
|
||||
else:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -25,7 +24,7 @@ else:
|
||||
_import_structure["pipeline_semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
|
||||
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
|
||||
from ...utils import BaseOutput
|
||||
|
||||
|
||||
+1
-1
@@ -12,7 +12,7 @@ from ...schedulers import KarrasDiffusionSchedulers
|
||||
from ...utils import deprecate, logging
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import SemanticStableDiffusionPipelineOutput
|
||||
from . import SemanticStableDiffusionPipelineOutput
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
DIFFUSERS_SLOW_IMPORT,
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
get_objects_from_module,
|
||||
@@ -35,7 +34,7 @@ else:
|
||||
"VoidNeRFModel",
|
||||
]
|
||||
|
||||
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
if not (is_transformers_available() and is_torch_available()):
|
||||
raise OptionalDependencyNotAvailable()
|
||||
|
||||
@@ -17,7 +17,7 @@ from dataclasses import dataclass
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from dataclasses import dataclass
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, CLIPVisionModel
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user