Compare commits

..

1 Commits

Author SHA1 Message Date
Dhruv Nair 8ddd914933 update 2024-07-22 16:01:27 +00:00
58 changed files with 316 additions and 453 deletions
+2 -3
View File
@@ -19,11 +19,10 @@ jobs:
strategy:
fail-fast: false
max-parallel: 1
runs-on:
group: aws-g6-4xlarge-plus
runs-on: [single-gpu, nvidia-gpu, a10, ci]
container:
image: diffusers/diffusers-pytorch-compile-cuda
options: --shm-size "16gb" --ipc host --gpus 0
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
+84 -17
View File
@@ -7,7 +7,7 @@ on:
env:
DIFFUSERS_IS_CI: yes
HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 600
@@ -27,6 +27,10 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install dependencies
run: |
pip install -e .
@@ -46,17 +50,16 @@ jobs:
path: reports
run_nightly_tests_for_torch_pipelines:
name: Nightly Torch Pipelines CUDA Tests
name: Torch Pipelines CUDA Nightly Tests
needs: setup_torch_cuda_pipeline_matrix
strategy:
fail-fast: false
max-parallel: 8
matrix:
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -64,16 +67,19 @@ jobs:
fetch-depth: 2
- name: NVIDIA-SMI
run: nvidia-smi
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install pytest-reportlog
- name: Environment
run: |
python utils/print_env.py
- name: Pipeline CUDA Test
- name: Nightly PyTorch CUDA checkpoint (pipelines) tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
@@ -84,36 +90,38 @@ jobs:
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
tests/pipelines/${{ matrix.module }}
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: pipeline_${{ matrix.module }}_test_reports
path: reports
- name: Generate Report and Notify Channel
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
run_nightly_tests_for_other_torch_modules:
name: Nightly Torch CUDA Tests
name: Torch Non-Pipelines CUDA Nightly Tests
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
defaults:
run:
shell: bash
strategy:
matrix:
max-parallel: 2
module: [models, schedulers, lora, others, single_file, examples]
module: [models, schedulers, others, examples]
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -125,8 +133,8 @@ jobs:
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
python -m uv pip install pytest-reportlog
- name: Environment
run: python utils/print_env.py
@@ -150,6 +158,7 @@ jobs:
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8
run: |
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v --make-reports=examples_torch_cuda \
--report-log=examples_torch_cuda.log \
@@ -172,7 +181,64 @@ jobs:
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
run_lora_nightly_tests:
name: Nightly LoRA Tests with PEFT and TORCH
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
python -m uv pip install pytest-reportlog
- name: Environment
run: python utils/print_env.py
- name: Run nightly LoRA tests with PEFT and Torch
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_lora_cuda \
--report-log=tests_torch_lora_cuda.log \
tests/lora
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_torch_lora_cuda_stats.txt
cat reports/tests_torch_lora_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: torch_lora_cuda_test_reports
path: reports
- name: Generate Report and Notify Channel
if: always()
run: |
pip install slack_sdk tabulate
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
run_flax_tpu_tests:
name: Nightly Flax TPU Tests
@@ -228,14 +294,14 @@ jobs:
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
run_nightly_onnx_tests:
name: Nightly ONNXRuntime CUDA tests on Ubuntu
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-onnxruntime-cuda
options: --gpus 0 --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
steps:
- name: Checkout diffusers
@@ -252,10 +318,11 @@ jobs:
python -m uv pip install -e [quality,test]
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install pytest-reportlog
- name: Environment
run: python utils/print_env.py
- name: Run Nightly ONNXRuntime CUDA tests
- name: Run nightly ONNXRuntime CUDA tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
@@ -282,7 +349,7 @@ jobs:
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
run_nightly_tests_apple_m1:
name: Nightly PyTorch MPS tests on MacOS
@@ -344,4 +411,4 @@ jobs:
if: always()
run: |
pip install slack_sdk tabulate
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
+64 -8
View File
@@ -11,9 +11,11 @@ on:
env:
DIFFUSERS_IS_CI: yes
HF_HOME: /mnt/cache
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 600
RUN_SLOW: yes
PIPELINE_USAGE_CUTOFF: 50000
jobs:
@@ -50,7 +52,7 @@ jobs:
path: reports
torch_pipelines_cuda_tests:
name: Torch Pipelines CUDA Tests
name: Torch Pipelines CUDA Slow Tests
needs: setup_torch_cuda_pipeline_matrix
strategy:
fail-fast: false
@@ -60,7 +62,7 @@ jobs:
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -104,7 +106,7 @@ jobs:
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host --gpus 0
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
defaults:
run:
shell: bash
@@ -122,13 +124,12 @@ jobs:
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
- name: Environment
run: |
python utils/print_env.py
- name: Run PyTorch CUDA tests
- name: Run slow PyTorch CUDA tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
@@ -152,6 +153,61 @@ jobs:
name: torch_cuda_test_reports
path: reports
peft_cuda_tests:
name: PEFT CUDA Tests
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-pytorch-cuda
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
python -m pip install -U peft@git+https://github.com/huggingface/peft.git
- name: Environment
run: |
python utils/print_env.py
- name: Run slow PEFT CUDA tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx and not PEFTLoRALoading" \
--make-reports=tests_peft_cuda \
tests/lora/
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "lora and not Flax and not Onnx and not PEFTLoRALoading" \
--make-reports=tests_peft_cuda_models_lora \
tests/models/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_peft_cuda_stats.txt
cat reports/tests_peft_cuda_failures_short.txt
cat reports/tests_peft_cuda_models_lora_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: torch_peft_test_reports
path: reports
flax_tpu_tests:
name: Flax TPU Tests
runs-on: docker-tpu
@@ -253,7 +309,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-compile-cuda
options: --gpus 0 --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Checkout diffusers
@@ -295,7 +351,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-xformers-cuda
options: --gpus 0 --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Checkout diffusers
@@ -336,7 +392,7 @@ jobs:
container:
image: diffusers/diffusers-pytorch-cuda
options: --gpus 0 --shm-size "16gb" --ipc host
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Checkout diffusers
@@ -38,7 +38,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \
@@ -38,7 +38,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \
-1
View File
@@ -38,7 +38,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \
@@ -38,7 +38,6 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
datasets \
hf-doc-builder \
huggingface-hub \
hf_transfer \
Jinja2 \
librosa \
numpy==1.26.4 \
@@ -340,7 +340,6 @@ Now you can wrap all these components together in a training loop with 🤗 Acce
... loss = F.mse_loss(noise_pred, noise)
... accelerator.backward(loss)
... if (step + 1) % config.gradient_accumulation_steps == 0:
... accelerator.clip_grad_norm_(model.parameters(), 1.0)
... optimizer.step()
... lr_scheduler.step()
@@ -1302,7 +1302,7 @@ def main(args):
text_encoder_lora_layers=text_encoder_one_lora_layers_to_save,
)
if args.train_text_encoder_ti:
embedding_handler.save_embeddings(f"{args.output_dir}/{Path(args.output_dir).name}_emb.safetensors")
embedding_handler.save_embeddings(f"{output_dir}/{args.output_dir}_emb.safetensors")
def load_model_hook(models, input_dir):
unet_ = None
@@ -1605,15 +1605,13 @@ def main(args):
if isinstance(model, type(unwrap_model(unet))):
unet_lora_layers_to_save = convert_state_dict_to_diffusers(get_peft_model_state_dict(model))
elif isinstance(model, type(unwrap_model(text_encoder_one))):
if args.train_text_encoder:
text_encoder_one_lora_layers_to_save = convert_state_dict_to_diffusers(
get_peft_model_state_dict(model)
)
text_encoder_one_lora_layers_to_save = convert_state_dict_to_diffusers(
get_peft_model_state_dict(model)
)
elif isinstance(model, type(unwrap_model(text_encoder_two))):
if args.train_text_encoder:
text_encoder_two_lora_layers_to_save = convert_state_dict_to_diffusers(
get_peft_model_state_dict(model)
)
text_encoder_two_lora_layers_to_save = convert_state_dict_to_diffusers(
get_peft_model_state_dict(model)
)
else:
raise ValueError(f"unexpected save model: {model.__class__}")
@@ -1627,7 +1625,7 @@ def main(args):
text_encoder_2_lora_layers=text_encoder_two_lora_layers_to_save,
)
if args.train_text_encoder_ti:
embedding_handler.save_embeddings(f"{args.output_dir}/{Path(args.output_dir).name}_emb.safetensors")
embedding_handler.save_embeddings(f"{output_dir}/{args.output_dir}_emb.safetensors")
def load_model_hook(models, input_dir):
unet_ = None
+5 -1
View File
@@ -24,6 +24,7 @@ from ..utils import (
is_bitsandbytes_available,
is_flax_available,
is_google_colab,
is_notebook,
is_peft_available,
is_safetensors_available,
is_torch_available,
@@ -106,6 +107,8 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
platform_info = platform.platform()
is_notebook_str = "Yes" if is_notebook() else "No"
is_google_colab_str = "Yes" if is_google_colab() else "No"
accelerator = "NA"
@@ -120,7 +123,7 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
out_str = out_str.decode("utf-8")
if len(out_str) > 0:
accelerator = out_str.strip()
accelerator = out_str.strip() + " VRAM"
except FileNotFoundError:
pass
elif platform.system() == "Darwin": # Mac OS
@@ -152,6 +155,7 @@ class EnvironmentCommand(BaseDiffusersCLICommand):
info = {
"🤗 Diffusers version": version,
"Platform": platform_info,
"Running on a notebook?": is_notebook_str,
"Running on Google Colab?": is_google_colab_str,
"Python version": platform.python_version(),
"PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
+1 -118
View File
@@ -677,21 +677,6 @@ class Attention(nn.Module):
concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data])
self.to_kv.bias.copy_(concatenated_bias)
# handle added projections for SD3 and others.
if hasattr(self, "add_q_proj") and hasattr(self, "add_k_proj") and hasattr(self, "add_v_proj"):
concatenated_weights = torch.cat(
[self.add_q_proj.weight.data, self.add_k_proj.weight.data, self.add_v_proj.weight.data]
)
in_features = concatenated_weights.shape[1]
out_features = concatenated_weights.shape[0]
self.to_added_qkv = nn.Linear(in_features, out_features, bias=True, device=device, dtype=dtype)
self.to_added_qkv.weight.copy_(concatenated_weights)
concatenated_bias = torch.cat(
[self.add_q_proj.bias.data, self.add_k_proj.bias.data, self.add_v_proj.bias.data]
)
self.to_added_qkv.bias.copy_(concatenated_bias)
self.fused_projections = fuse
@@ -1182,6 +1167,7 @@ class AuraFlowAttnProcessor2_0:
attn: Attention,
hidden_states: torch.FloatTensor,
encoder_hidden_states: torch.FloatTensor = None,
i=0,
*args,
**kwargs,
) -> torch.FloatTensor:
@@ -1722,109 +1708,6 @@ class HunyuanAttnProcessor2_0:
return hidden_states
class FusedHunyuanAttnProcessor2_0:
r"""
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0) with fused
projection layers. This is used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on
query and key vector.
"""
def __init__(self):
if not hasattr(F, "scaled_dot_product_attention"):
raise ImportError(
"FusedHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0."
)
def __call__(
self,
attn: Attention,
hidden_states: torch.Tensor,
encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
temb: Optional[torch.Tensor] = None,
image_rotary_emb: Optional[torch.Tensor] = None,
) -> torch.Tensor:
from .embeddings import apply_rotary_emb
residual = hidden_states
if attn.spatial_norm is not None:
hidden_states = attn.spatial_norm(hidden_states, temb)
input_ndim = hidden_states.ndim
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
batch_size, sequence_length, _ = (
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
)
if attention_mask is not None:
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
# scaled_dot_product_attention expects attention_mask shape to be
# (batch, heads, source_length, target_length)
attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
if encoder_hidden_states is None:
qkv = attn.to_qkv(hidden_states)
split_size = qkv.shape[-1] // 3
query, key, value = torch.split(qkv, split_size, dim=-1)
else:
if attn.norm_cross:
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
query = attn.to_q(hidden_states)
kv = attn.to_kv(encoder_hidden_states)
split_size = kv.shape[-1] // 2
key, value = torch.split(kv, split_size, dim=-1)
inner_dim = key.shape[-1]
head_dim = inner_dim // attn.heads
query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
if attn.norm_q is not None:
query = attn.norm_q(query)
if attn.norm_k is not None:
key = attn.norm_k(key)
# Apply RoPE if needed
if image_rotary_emb is not None:
query = apply_rotary_emb(query, image_rotary_emb)
if not attn.is_cross_attention:
key = apply_rotary_emb(key, image_rotary_emb)
# the output of sdp = (batch, num_heads, seq_len, head_dim)
# TODO: add support for attn.scale when we move to Torch 2.1
hidden_states = F.scaled_dot_product_attention(
query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
)
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
hidden_states = hidden_states.to(query.dtype)
# linear proj
hidden_states = attn.to_out[0](hidden_states)
# dropout
hidden_states = attn.to_out[1](hidden_states)
if input_ndim == 4:
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
if attn.residual_connection:
hidden_states = hidden_states + residual
hidden_states = hidden_states / attn.rescale_output_factor
return hidden_states
class LuminaAttnProcessor2_0:
r"""
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
@@ -26,7 +26,6 @@ from ..attention_processor import (
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
FusedAttnProcessor2_0,
)
from ..modeling_outputs import AutoencoderKLOutput
from ..modeling_utils import ModelMixin
@@ -63,9 +62,6 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
mid_block_add_attention (`bool`, *optional*, default to `True`):
If enabled, the mid_block of the Encoder and Decoder will have attention blocks. If set to false, the
mid_block will only have resnet blocks
"""
_supports_gradient_checkpointing = True
@@ -91,7 +87,6 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
force_upcast: float = True,
use_quant_conv: bool = True,
use_post_quant_conv: bool = True,
mid_block_add_attention: bool = True,
):
super().__init__()
@@ -105,7 +100,6 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
act_fn=act_fn,
norm_num_groups=norm_num_groups,
double_z=True,
mid_block_add_attention=mid_block_add_attention,
)
# pass init params to Decoder
@@ -117,7 +111,6 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
layers_per_block=layers_per_block,
norm_num_groups=norm_num_groups,
act_fn=act_fn,
mid_block_add_attention=mid_block_add_attention,
)
self.quant_conv = nn.Conv2d(2 * latent_channels, 2 * latent_channels, 1) if use_quant_conv else None
@@ -493,8 +486,6 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
+2 -4
View File
@@ -22,7 +22,7 @@ import torch.nn as nn
from ..configuration_utils import ConfigMixin, register_to_config
from ..loaders import FromOriginalModelMixin, PeftAdapterMixin
from ..models.attention import JointTransformerBlock
from ..models.attention_processor import Attention, AttentionProcessor, FusedJointAttnProcessor2_0
from ..models.attention_processor import Attention, AttentionProcessor
from ..models.modeling_outputs import Transformer2DModelOutput
from ..models.modeling_utils import ModelMixin
from ..utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
@@ -196,7 +196,7 @@ class SD3ControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
for name, module in self.named_children():
fn_recursive_attn_processor(name, module, processor)
# Copied from diffusers.models.transformers.transformer_sd3.SD3Transformer2DModel.fuse_qkv_projections
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
def fuse_qkv_projections(self):
"""
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
@@ -220,8 +220,6 @@ class SD3ControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginal
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedJointAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
-3
View File
@@ -29,7 +29,6 @@ from .attention_processor import (
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
FusedAttnProcessor2_0,
)
from .controlnet import ControlNetConditioningEmbedding
from .embeddings import TimestepEmbedding, Timesteps
@@ -1002,8 +1001,6 @@ class UNetControlNetXSModel(ModelMixin, ConfigMixin):
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
@@ -20,7 +20,7 @@ from ...configuration_utils import ConfigMixin, register_to_config
from ...utils import logging
from ...utils.torch_utils import maybe_allow_in_graph
from ..attention import FeedForward
from ..attention_processor import Attention, AttentionProcessor, FusedHunyuanAttnProcessor2_0, HunyuanAttnProcessor2_0
from ..attention_processor import Attention, AttentionProcessor, HunyuanAttnProcessor2_0
from ..embeddings import (
HunyuanCombinedTimestepTextSizeStyleEmbedding,
PatchEmbed,
@@ -317,7 +317,7 @@ class HunyuanDiT2DModel(ModelMixin, ConfigMixin):
self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6)
self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True)
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedHunyuanAttnProcessor2_0
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
def fuse_qkv_projections(self):
"""
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
@@ -341,8 +341,6 @@ class HunyuanDiT2DModel(ModelMixin, ConfigMixin):
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedHunyuanAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
@@ -23,7 +23,7 @@ import torch.nn as nn
from ...configuration_utils import ConfigMixin, register_to_config
from ...loaders import FromOriginalModelMixin, PeftAdapterMixin
from ...models.attention import JointTransformerBlock
from ...models.attention_processor import Attention, AttentionProcessor, FusedJointAttnProcessor2_0
from ...models.attention_processor import Attention, AttentionProcessor
from ...models.modeling_utils import ModelMixin
from ...models.normalization import AdaLayerNormContinuous
from ...utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
@@ -211,7 +211,7 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
for name, module in self.named_children():
fn_recursive_attn_processor(name, module, processor)
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedJointAttnProcessor2_0
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
def fuse_qkv_projections(self):
"""
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
@@ -235,8 +235,6 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedJointAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
@@ -30,7 +30,6 @@ from ..attention_processor import (
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
FusedAttnProcessor2_0,
)
from ..embeddings import (
GaussianFourierProjection,
@@ -891,8 +890,6 @@ class UNet2DConditionModel(
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedAttnProcessor2_0())
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
@@ -31,7 +31,6 @@ from ..attention_processor import (
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
FusedAttnProcessor2_0,
)
from ..embeddings import TimestepEmbedding, Timesteps
from ..modeling_utils import ModelMixin
@@ -533,8 +532,6 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
@@ -29,7 +29,6 @@ from ..attention_processor import (
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
FusedAttnProcessor2_0,
)
from ..embeddings import TimestepEmbedding, Timesteps
from ..modeling_utils import ModelMixin
@@ -499,8 +498,6 @@ class I2VGenXLUNet(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
@@ -29,7 +29,6 @@ from ..attention_processor import (
AttnAddedKVProcessor,
AttnProcessor,
AttnProcessor2_0,
FusedAttnProcessor2_0,
IPAdapterAttnProcessor,
IPAdapterAttnProcessor2_0,
)
@@ -930,8 +929,6 @@ class UNetMotionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
if isinstance(module, Attention):
module.fuse_projections(fuse=True)
self.set_attn_processor(FusedAttnProcessor2_0())
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
def unfuse_qkv_projections(self):
"""Disables the fused QKV projection if enabled.
@@ -286,7 +286,6 @@ class AudioLDM2Pipeline(DiffusionPipeline):
The sequence of generated hidden-states.
"""
max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
model_kwargs = self.language_model._get_initial_cache_position(inputs_embeds, model_kwargs)
for _ in range(max_new_tokens):
# prepare model inputs
model_inputs = prepare_inputs_for_generation(inputs_embeds, **model_kwargs)
@@ -260,6 +260,7 @@ class AuraFlowPipeline(DiffusionPipeline):
padding="max_length",
return_tensors="pt",
)
text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
text_input_ids = text_inputs["input_ids"]
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
@@ -272,7 +273,6 @@ class AuraFlowPipeline(DiffusionPipeline):
f" {max_length} tokens: {removed_text}"
)
text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
prompt_embeds = self.text_encoder(**text_inputs)[0]
prompt_attention_mask = text_inputs["attention_mask"].unsqueeze(-1).expand(prompt_embeds.shape)
prompt_embeds = prompt_embeds * prompt_attention_mask
+1
View File
@@ -73,6 +73,7 @@ from .import_utils import (
is_librosa_available,
is_matplotlib_available,
is_note_seq_available,
is_notebook,
is_onnx_available,
is_peft_available,
is_peft_version,
+16 -1
View File
@@ -321,7 +321,18 @@ try:
except importlib_metadata.PackageNotFoundError:
_bitsandbytes_available = False
_is_google_colab = "google.colab" in sys.modules or any(k.startswith("COLAB_") for k in os.environ)
# Taken from `huggingface_hub`.
_is_notebook = False
try:
shell_class = get_ipython().__class__ # type: ignore # noqa: F821
for parent_class in shell_class.__mro__: # e.g. "is subclass of"
if parent_class.__name__ == "ZMQInteractiveShell":
_is_notebook = True # Jupyter notebook, Google colab or qtconsole
break
except NameError:
pass # Probably standard Python interpreter
_is_google_colab = "google.colab" in sys.modules
def is_torch_available():
@@ -432,6 +443,10 @@ def is_bitsandbytes_available():
return _bitsandbytes_available
def is_notebook():
return _is_notebook
def is_google_colab():
return _is_google_colab
+4 -2
View File
@@ -124,9 +124,11 @@ class ModelUtilsTest(unittest.TestCase):
if p1.data.ne(p2.data).sum() > 0:
assert False, "Parameters not the same!"
@unittest.skip("Flaky behaviour on CI. Re-enable after migrating to new runners")
@unittest.skipIf(torch_device == "mps", reason="Test not supported for MPS.")
def test_one_request_upon_cached(self):
# TODO: For some reason this test fails on MPS where no HEAD call is made.
if torch_device == "mps":
return
use_safetensors = False
with tempfile.TemporaryDirectory() as tmpdirname:
@@ -1,67 +0,0 @@
# coding=utf-8
# Copyright 2024 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import torch
from diffusers.models.transformers import TransformerTemporalModel
from diffusers.utils.testing_utils import (
enable_full_determinism,
torch_device,
)
from ..test_modeling_common import ModelTesterMixin
enable_full_determinism()
class TemporalTransformerTests(ModelTesterMixin, unittest.TestCase):
model_class = TransformerTemporalModel
main_input_name = "hidden_states"
@property
def dummy_input(self):
batch_size = 2
num_channels = 4
height = width = 32
hidden_states = torch.randn((batch_size, num_channels, height, width)).to(torch_device)
timestep = torch.randint(0, 1000, size=(batch_size,)).to(torch_device)
return {
"hidden_states": hidden_states,
"timestep": timestep,
}
@property
def input_shape(self):
return (4, 32, 32)
@property
def output_shape(self):
return (4, 32, 32)
def prepare_init_args_and_inputs_for_common(self):
init_dict = {
"num_attention_heads": 8,
"attention_head_dim": 4,
"in_channels": 4,
"num_layers": 1,
"norm_num_groups": 1,
}
inputs_dict = self.dummy_input
return init_dict, inputs_dict
+23 -42
View File
@@ -73,15 +73,14 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def get_dummy_components(self):
torch.manual_seed(0)
unet = AudioLDM2UNet2DConditionModel(
block_out_channels=(8, 16),
layers_per_block=1,
norm_num_groups=8,
block_out_channels=(32, 64),
layers_per_block=2,
sample_size=32,
in_channels=4,
out_channels=4,
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
cross_attention_dim=(8, 16),
cross_attention_dim=([None, 16, 32], [None, 16, 32]),
)
scheduler = DDIMScheduler(
beta_start=0.00085,
@@ -92,10 +91,9 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
)
torch.manual_seed(0)
vae = AutoencoderKL(
block_out_channels=[8, 16],
block_out_channels=[32, 64],
in_channels=1,
out_channels=1,
norm_num_groups=8,
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
latent_channels=4,
@@ -104,34 +102,32 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_branch_config = ClapTextConfig(
bos_token_id=0,
eos_token_id=2,
hidden_size=8,
hidden_size=16,
intermediate_size=37,
layer_norm_eps=1e-05,
num_attention_heads=1,
num_hidden_layers=1,
num_attention_heads=2,
num_hidden_layers=2,
pad_token_id=1,
vocab_size=1000,
projection_dim=8,
projection_dim=16,
)
audio_branch_config = ClapAudioConfig(
spec_size=8,
spec_size=64,
window_size=4,
num_mel_bins=8,
num_mel_bins=64,
intermediate_size=37,
layer_norm_eps=1e-05,
depths=[1, 1],
num_attention_heads=[1, 1],
num_hidden_layers=1,
depths=[2, 2],
num_attention_heads=[2, 2],
num_hidden_layers=2,
hidden_size=192,
projection_dim=8,
projection_dim=16,
patch_size=2,
patch_stride=2,
patch_embed_input_channels=4,
)
text_encoder_config = ClapConfig.from_text_audio_configs(
text_config=text_branch_config,
audio_config=audio_branch_config,
projection_dim=16,
text_config=text_branch_config, audio_config=audio_branch_config, projection_dim=16
)
text_encoder = ClapModel(text_encoder_config)
tokenizer = RobertaTokenizer.from_pretrained("hf-internal-testing/tiny-random-roberta", model_max_length=77)
@@ -145,8 +141,8 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
d_model=32,
d_ff=37,
d_kv=8,
num_heads=1,
num_layers=1,
num_heads=2,
num_layers=2,
)
text_encoder_2 = T5EncoderModel(text_encoder_2_config)
tokenizer_2 = T5Tokenizer.from_pretrained("hf-internal-testing/tiny-random-T5Model", model_max_length=77)
@@ -154,8 +150,8 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
torch.manual_seed(0)
language_model_config = GPT2Config(
n_embd=16,
n_head=1,
n_layer=1,
n_head=2,
n_layer=2,
vocab_size=1000,
n_ctx=99,
n_positions=99,
@@ -164,11 +160,7 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
language_model.config.max_new_tokens = 8
torch.manual_seed(0)
projection_model = AudioLDM2ProjectionModel(
text_encoder_dim=16,
text_encoder_1_dim=32,
langauge_model_dim=16,
)
projection_model = AudioLDM2ProjectionModel(text_encoder_dim=16, text_encoder_1_dim=32, langauge_model_dim=16)
vocoder_config = SpeechT5HifiGanConfig(
model_in_dim=8,
@@ -228,18 +220,7 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
audio_slice = audio[:10]
expected_slice = np.array(
[
2.602e-03,
1.729e-03,
1.863e-03,
-2.219e-03,
-2.656e-03,
-2.017e-03,
-2.648e-03,
-2.115e-03,
-2.502e-03,
-2.081e-03,
]
[0.0025, 0.0018, 0.0018, -0.0023, -0.0026, -0.0020, -0.0026, -0.0021, -0.0027, -0.0020]
)
assert np.abs(audio_slice - expected_slice).max() < 1e-4
@@ -380,7 +361,7 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
audio_slice = audio[:10]
expected_slice = np.array(
[0.0026, 0.0017, 0.0018, -0.0022, -0.0026, -0.002, -0.0026, -0.0021, -0.0025, -0.0021]
[0.0025, 0.0018, 0.0018, -0.0023, -0.0026, -0.0020, -0.0026, -0.0021, -0.0027, -0.0020]
)
assert np.abs(audio_slice - expected_slice).max() < 1e-4
@@ -407,7 +388,7 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert audios.shape == (batch_size, 256)
# test num_waveforms_per_prompt for single prompt
num_waveforms_per_prompt = 1
num_waveforms_per_prompt = 2
audios = audioldm_pipe(prompt, num_inference_steps=2, num_waveforms_per_prompt=num_waveforms_per_prompt).audios
assert audios.shape == (num_waveforms_per_prompt, 256)
@@ -37,12 +37,7 @@ from diffusers import (
UNet2DConditionModel,
)
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
require_torch_gpu,
torch_device,
)
from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, require_torch_gpu, torch_device
from ..pipeline_params import (
IMAGE_TO_IMAGE_IMAGE_PARAMS,
@@ -233,6 +228,12 @@ class ControlNetPipelineSDXLFastTests(
def test_attention_slicing_forward_pass(self):
return self._test_attention_slicing_forward_pass(expected_max_diff=2e-3)
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.5490, 0.5053, 0.4676, 0.5816, 0.5364, 0.4830, 0.5937, 0.5719, 0.4318])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
@unittest.skipIf(
torch_device != "cuda" or not is_xformers_available(),
reason="XFormers attention is only available with CUDA and `xformers` installed",
@@ -340,8 +341,7 @@ class ControlNetPipelineSDXLFastTests(
output = sd_pipe(**inputs)
image_slice = output.images[0, -3:, -3:, -1]
expected_slice = np.array([0.5460, 0.4943, 0.4635, 0.5832, 0.5366, 0.4815, 0.6034, 0.5741, 0.4341])
expected_slice = np.array([0.549, 0.5053, 0.4676, 0.5816, 0.5364, 0.483, 0.5937, 0.5719, 0.4318])
# make sure that it's equal
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-4
@@ -195,7 +195,7 @@ class StableDiffusionXLControlNetPipelineFastTests(
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array(
[0.7335, 0.5866, 0.5623, 0.6242, 0.5751, 0.5999, 0.4091, 0.4590, 0.5054]
[0.7331, 0.5907, 0.5667, 0.6029, 0.5679, 0.5968, 0.4033, 0.4761, 0.5090]
)
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
@@ -348,8 +348,9 @@ class StableDiffusionXLControlNetPipelineFastTests(
output = sd_pipe(**inputs)
image_slice = output.images[0, -3:, -3:, -1]
expected_slice = np.array([0.7335, 0.5866, 0.5623, 0.6242, 0.5751, 0.5999, 0.4091, 0.4590, 0.5054])
expected_slice = np.array(
[0.7330834, 0.590667, 0.5667336, 0.6029023, 0.5679491, 0.5968194, 0.4032986, 0.47612396, 0.5089609]
)
# make sure that it's equal
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-4
@@ -370,7 +371,7 @@ class StableDiffusionXLControlNetPipelineFastTests(
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.7820, 0.6195, 0.6193, 0.7045, 0.6706, 0.5837, 0.4147, 0.5232, 0.4868])
expected_slice = np.array([0.7799, 0.614, 0.6162, 0.7082, 0.6662, 0.5833, 0.4148, 0.5182, 0.4866])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -964,8 +965,9 @@ class StableDiffusionSSD1BControlNetPipelineFastTests(StableDiffusionXLControlNe
output = sd_pipe(**inputs)
image_slice = output.images[0, -3:, -3:, -1]
expected_slice = np.array([0.7212, 0.5890, 0.5491, 0.6425, 0.5970, 0.6091, 0.4418, 0.4556, 0.5032])
expected_slice = np.array(
[0.6831671, 0.5702532, 0.5459845, 0.6299793, 0.58563006, 0.6033695, 0.4493941, 0.46132287, 0.5035841]
)
# make sure that it's equal
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-4
@@ -973,8 +975,7 @@ class StableDiffusionSSD1BControlNetPipelineFastTests(StableDiffusionXLControlNe
def test_ip_adapter_single(self):
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array([0.7212, 0.5890, 0.5491, 0.6425, 0.5970, 0.6091, 0.4418, 0.4556, 0.5032])
expected_pipe_slice = np.array([0.6832, 0.5703, 0.5460, 0.6300, 0.5856, 0.6034, 0.4494, 0.4613, 0.5036])
return super().test_ip_adapter_single(from_ssd1b=True, expected_pipe_slice=expected_pipe_slice)
def test_controlnet_sdxl_lcm(self):
@@ -993,7 +994,7 @@ class StableDiffusionSSD1BControlNetPipelineFastTests(StableDiffusionXLControlNe
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.6787, 0.5117, 0.5558, 0.6963, 0.6571, 0.5928, 0.4121, 0.5468, 0.5057])
expected_slice = np.array([0.6850, 0.5135, 0.5545, 0.7033, 0.6617, 0.5971, 0.4165, 0.5480, 0.5070])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -178,8 +178,7 @@ class ControlNetPipelineSDXLImg2ImgFastTests(
def test_ip_adapter_single(self):
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array([0.6276, 0.5271, 0.5205, 0.5393, 0.5774, 0.5872, 0.5456, 0.5415, 0.5354])
# TODO: update after slices.p
expected_pipe_slice = np.array([0.6265, 0.5441, 0.5384, 0.5446, 0.5810, 0.5908, 0.5414, 0.5428, 0.5353])
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
def test_stable_diffusion_xl_controlnet_img2img(self):
@@ -180,10 +180,11 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes
image = output.images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 32, 32, 3)
expected_slice = np.array([0.5767, 0.7100, 0.5981, 0.5674, 0.5952, 0.4102, 0.5093, 0.5044, 0.6030])
expected_slice = np.array(
[0.5761719, 0.71777344, 0.59228516, 0.578125, 0.6020508, 0.39453125, 0.46728516, 0.51708984, 0.58984375]
)
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -36,12 +36,7 @@ from diffusers.utils.testing_utils import (
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
PipelineTesterMixin,
check_qkv_fusion_matches_attn_procs_length,
check_qkv_fusion_processors_exist,
to_np,
)
from ..test_pipelines_common import PipelineTesterMixin, to_np
enable_full_determinism()
@@ -266,16 +261,6 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
original_image_slice = image[0, -3:, -3:, -1]
pipe.transformer.fuse_qkv_projections()
# TODO (sayakpaul): will refactor this once `fuse_qkv_projections()` has been added
# to the pipeline level.
pipe.transformer.fuse_qkv_projections()
assert check_qkv_fusion_processors_exist(
pipe.transformer
), "Something wrong with the fused attention processors. Expected all the attention processors to be fused."
assert check_qkv_fusion_matches_attn_procs_length(
pipe.transformer, pipe.transformer.original_attn_processors
), "Something wrong with the attention processors concerning the fused QKV projections."
inputs = self.get_dummy_inputs(device)
inputs["return_dict"] = False
image_fused = pipe(**inputs)[0]
@@ -39,6 +39,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
numpy_cosine_similarity_distance,
print_tensor_test,
require_torch_gpu,
skip_mps,
slow,
@@ -264,5 +265,6 @@ class I2VGenXLPipelineSlowTests(unittest.TestCase):
assert image.shape == (num_frames, 704, 1280, 3)
image_slice = image[0, -3:, -3:, -1]
print_tensor_test(image_slice.flatten())
expected_slice = np.array([0.5482, 0.6244, 0.6274, 0.4584, 0.5935, 0.5937, 0.4579, 0.5767, 0.5892])
assert numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice.flatten()) < 1e-3
@@ -94,7 +94,7 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.2893, 0.1464, 0.4603, 0.3529, 0.4612, 0.7701, 0.4027, 0.3051, 0.5155])
expected_slice = np.array([0.0000, 0.0000, 0.6777, 0.1363, 0.3624, 0.7868, 0.3869, 0.3395, 0.5068])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -200,7 +200,7 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4852, 0.4136, 0.4539, 0.4781, 0.4680, 0.5217, 0.4973, 0.4089, 0.4977])
expected_slice = np.array([0.4260, 0.3596, 0.4571, 0.3890, 0.4087, 0.5137, 0.4819, 0.4116, 0.5053])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -305,14 +305,11 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
print(image_from_tuple_slice)
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.0320, 0.0860, 0.4013, 0.0518, 0.2484, 0.5847, 0.4411, 0.2321, 0.4593])
expected_slice = np.array([0.0477, 0.0808, 0.2972, 0.2705, 0.3620, 0.6247, 0.4464, 0.2870, 0.3530])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -211,13 +211,12 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
)[0]
image_slice = image[0, -10:]
image_from_tuple_slice = image_from_tuple[0, -10:]
assert image.shape == (1, 32)
expected_slice = np.array(
[-0.5948, 0.1875, -0.1523, -1.1995, -1.4061, -0.6367, -1.4607, -0.6406, 0.8793, -0.3891]
[-0.0532, 1.7120, 0.3656, -1.0852, -0.8946, -1.1756, 0.4348, 0.2482, 0.5146, -0.1156]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -99,7 +99,7 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.3076, 0.2729, 0.5668, 0.0522, 0.3384, 0.7028, 0.4908, 0.3659, 0.6243])
expected_slice = np.array([0.3013, 0.0471, 0.5176, 0.1817, 0.2566, 0.7076, 0.6712, 0.4421, 0.7503])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -221,7 +221,7 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4445, 0.4287, 0.4596, 0.3919, 0.3730, 0.5039, 0.4834, 0.4269, 0.5521])
expected_slice = np.array([0.4353, 0.4710, 0.5128, 0.4806, 0.5054, 0.5348, 0.5224, 0.4603, 0.5025])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -213,13 +213,12 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
)[0]
image_slice = image[0, -10:]
image_from_tuple_slice = image_from_tuple[0, -10:]
assert image.shape == (1, 32)
expected_slice = np.array(
[-0.5948, 0.1875, -0.1523, -1.1995, -1.4061, -0.6367, -1.4607, -0.6406, 0.8793, -0.3891]
[-0.0532, 1.7120, 0.3656, -1.0852, -0.8946, -1.1756, 0.4348, 0.2482, 0.5146, -0.1156]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -30,12 +30,7 @@ from transformers import (
)
from diffusers import KandinskyV22PriorEmb2EmbPipeline, PriorTransformer, UnCLIPScheduler
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
skip_mps,
torch_device,
)
from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, skip_mps, torch_device
from ..test_pipelines_common import PipelineTesterMixin
@@ -215,13 +210,23 @@ class KandinskyV22PriorEmb2EmbPipelineFastTests(PipelineTesterMixin, unittest.Te
)[0]
image_slice = image[0, -10:]
image_from_tuple_slice = image_from_tuple[0, -10:]
assert image.shape == (1, 32)
expected_slice = np.array(
[-0.8947, 0.7225, -0.2400, -1.4224, -1.9268, -1.1454, -1.8220, -0.7972, 1.0465, -0.5207]
[
0.1071284,
1.3330271,
0.61260223,
-0.6691065,
-0.3846852,
-1.0303661,
0.22716111,
0.03348901,
0.30040675,
-0.24805029,
]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -28,7 +28,9 @@ from diffusers import (
StableDiffusionXLControlNetPipeline,
UNet2DConditionModel,
)
from diffusers.utils.testing_utils import enable_full_determinism
from diffusers.utils.testing_utils import (
enable_full_determinism,
)
from diffusers.utils.torch_utils import randn_tensor
from ..pipeline_params import (
@@ -235,7 +237,9 @@ class StableDiffusionXLControlNetPAGPipelineFastTests(
64,
3,
), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}"
expected_slice = np.array([0.7036, 0.5613, 0.5526, 0.6129, 0.5610, 0.5842, 0.4228, 0.4612, 0.5017])
expected_slice = np.array(
[0.6819614, 0.5551478, 0.5499094, 0.5769566, 0.53942275, 0.5707505, 0.41131154, 0.47833863, 0.49982738]
)
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}"
@@ -259,7 +263,9 @@ class StableDiffusionXLControlNetPAGPipelineFastTests(
64,
3,
), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}"
expected_slice = np.array([0.6888, 0.5398, 0.5603, 0.6086, 0.5541, 0.5957, 0.4332, 0.4643, 0.5154])
expected_slice = np.array(
[0.66685176, 0.53207266, 0.5541569, 0.5912994, 0.5368312, 0.58433825, 0.42607725, 0.46805605, 0.5098659]
)
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}"
+3 -1
View File
@@ -283,7 +283,9 @@ class StableDiffusionXLPAGPipelineFastTests(
64,
3,
), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}"
expected_slice = np.array([0.5382, 0.5439, 0.4704, 0.4569, 0.5234, 0.4834, 0.5289, 0.5039, 0.4764])
expected_slice = np.array(
[0.55341685, 0.55503535, 0.47299808, 0.43274558, 0.4965323, 0.46310428, 0.51455414, 0.5015592, 0.46913484]
)
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
self.assertLessEqual(max_diff, 1e-3)
+3 -1
View File
@@ -260,7 +260,9 @@ class StableDiffusionXLPAGImg2ImgPipelineFastTests(
32,
3,
), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}"
expected_slice = np.array([0.4613, 0.4902, 0.4406, 0.6788, 0.5611, 0.4529, 0.5893, 0.5975, 0.5226])
expected_slice = np.array(
[0.46703637, 0.4917526, 0.44394222, 0.6895079, 0.56251144, 0.45474228, 0.5957122, 0.6016377, 0.5276273]
)
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}"
+3 -1
View File
@@ -265,7 +265,9 @@ class StableDiffusionXLPAGInpaintPipelineFastTests(
64,
3,
), f"the shape of the output image should be (1, 64, 64, 3) but got {image.shape}"
expected_slice = np.array([0.8366, 0.5513, 0.6105, 0.6213, 0.6957, 0.7400, 0.6614, 0.6102, 0.5239])
expected_slice = np.array(
[0.8115454, 0.53986573, 0.5825281, 0.6028964, 0.67128646, 0.7046922, 0.6418713, 0.5933924, 0.5154763]
)
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}"
+1 -1
View File
@@ -181,7 +181,7 @@ class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert image.shape == (32, 16)
expected_slice = np.array([-1.0000, -0.6559, 1.0000, -0.9096, -0.7252, 0.8211, -0.7647, -0.3308, 0.6462])
expected_slice = np.array([-1.0000, -0.6241, 1.0000, -0.8978, -0.6866, 0.7876, -0.7473, -0.2874, 0.6103])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_inference_batch_consistent(self):
@@ -168,12 +168,22 @@ class StableCascadePriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase
image_from_tuple = pipe(**self.get_dummy_inputs(device), return_dict=False)[0]
image_slice = image[0, 0, 0, -10:]
image_from_tuple_slice = image_from_tuple[0, 0, 0, -10:]
assert image.shape == (1, 16, 24, 24)
expected_slice = np.array(
[94.5498, -21.9481, -117.5025, -192.8760, 38.0117, 73.4709, 38.1142, -185.5593, -47.7869, 167.2853]
[
96.139565,
-20.213179,
-116.40341,
-191.57129,
39.350136,
74.80767,
39.782352,
-184.67352,
-46.426907,
168.41783,
]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-2
@@ -13,11 +13,7 @@ from diffusers.utils.testing_utils import (
torch_device,
)
from ..test_pipelines_common import (
PipelineTesterMixin,
check_qkv_fusion_matches_attn_procs_length,
check_qkv_fusion_processors_exist,
)
from ..test_pipelines_common import PipelineTesterMixin
class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
@@ -195,16 +191,7 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
image = pipe(**inputs).images
original_image_slice = image[0, -3:, -3:, -1]
# TODO (sayakpaul): will refactor this once `fuse_qkv_projections()` has been added
# to the pipeline level.
pipe.transformer.fuse_qkv_projections()
assert check_qkv_fusion_processors_exist(
pipe.transformer
), "Something wrong with the fused attention processors. Expected all the attention processors to be fused."
assert check_qkv_fusion_matches_attn_procs_length(
pipe.transformer, pipe.transformer.original_attn_processors
), "Something wrong with the attention processors concerning the fused QKV projections."
inputs = self.get_dummy_inputs(device)
image = pipe(**inputs).images
image_slice_fused = image[0, -3:, -3:, -1]
@@ -133,7 +133,7 @@ class StableDiffusionImageVariationPipelineFastTests(
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851])
expected_slice = np.array([0.5239, 0.5723, 0.4796, 0.5049, 0.5550, 0.4685, 0.5329, 0.4891, 0.4921])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
@@ -153,7 +153,7 @@ class StableDiffusionImageVariationPipelineFastTests(
image_slice = image[-1, -3:, -3:, -1]
assert image.shape == (2, 64, 64, 3)
expected_slice = np.array([0.6647, 0.5557, 0.5723, 0.5567, 0.5869, 0.6044, 0.5502, 0.5439, 0.5189])
expected_slice = np.array([0.6892, 0.5637, 0.5836, 0.5771, 0.6254, 0.6409, 0.5580, 0.5569, 0.5289])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
@@ -205,7 +205,7 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
image_slice = image[0, -3:, -3:, -1].flatten()
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851])
expected_slice = np.array([0.8449, 0.9079, 0.7571, 0.7873, 0.8348, 0.7010, 0.6694, 0.6873, 0.6138])
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
assert max_diff < 1e-4
@@ -221,7 +221,7 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
latents = latents.detach().cpu().numpy()
assert latents.shape == (1, 4, 64, 64)
latents_slice = latents[0, -3:, -3:, -1]
expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851])
expected_slice = np.array([-0.7974, -0.4343, -1.087, 0.04785, -1.327, 0.855, -2.148, -0.1725, 1.439])
max_diff = numpy_cosine_similarity_distance(latents_slice.flatten(), expected_slice)
assert max_diff < 1e-3
@@ -230,7 +230,7 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
latents = latents.detach().cpu().numpy()
assert latents.shape == (1, 4, 64, 64)
latents_slice = latents[0, -3:, -3:, -1]
expected_slice = np.array([0.5348, 0.5924, 0.4798, 0.5237, 0.5741, 0.4651, 0.5344, 0.4942, 0.4851])
expected_slice = np.array([0.3232, 0.004883, 0.913, -1.084, 0.6143, -1.6875, -2.463, -0.439, -0.419])
max_diff = numpy_cosine_similarity_distance(latents_slice.flatten(), expected_slice)
assert max_diff < 1e-3
@@ -174,7 +174,7 @@ class StableDiffusionXLPipelineFastTests(
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5388, 0.5452, 0.4694, 0.4583, 0.5253, 0.4832, 0.5288, 0.5035, 0.47])
expected_slice = np.array([0.5552, 0.5569, 0.4725, 0.4348, 0.4994, 0.4632, 0.5142, 0.5012, 0.47])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -333,8 +333,7 @@ class StableDiffusionXLPipelineFastTests(
def test_ip_adapter_single(self):
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array([0.5388, 0.5452, 0.4694, 0.4583, 0.5253, 0.4832, 0.5288, 0.5035, 0.4766])
expected_pipe_slice = np.array([0.5552, 0.5569, 0.4725, 0.4348, 0.4994, 0.4632, 0.5142, 0.5012, 0.4700])
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
def test_attention_slicing_forward_pass(self):
@@ -295,9 +295,8 @@ class StableDiffusionXLAdapterPipelineFastTests(
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array(
[0.5752, 0.6155, 0.4826, 0.5111, 0.5741, 0.4678, 0.5199, 0.5231, 0.4794]
[0.5753, 0.6022, 0.4728, 0.4986, 0.5708, 0.4645, 0.5194, 0.5134, 0.4730]
)
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
def test_stable_diffusion_adapter_default_case(self):
@@ -312,7 +311,9 @@ class StableDiffusionXLAdapterPipelineFastTests(
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([00.5752, 0.6155, 0.4826, 0.5111, 0.5741, 0.4678, 0.5199, 0.5231, 0.4794])
expected_slice = np.array(
[0.5752919, 0.6022097, 0.4728038, 0.49861962, 0.57084894, 0.4644975, 0.5193715, 0.5133664, 0.4729858]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3
@parameterized.expand(
@@ -445,14 +446,15 @@ class StableDiffusionXLMultiAdapterPipelineFastTests(
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5617, 0.6081, 0.4807, 0.5071, 0.5665, 0.4614, 0.5165, 0.5164, 0.4786])
expected_slice = np.array(
[0.5813032, 0.60995954, 0.47563356, 0.5056669, 0.57199144, 0.4631841, 0.5176794, 0.51252556, 0.47183886]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3
def test_ip_adapter_single(self):
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array([0.5617, 0.6081, 0.4807, 0.5071, 0.5665, 0.4614, 0.5165, 0.5164, 0.4786])
expected_pipe_slice = np.array([0.5813, 0.6100, 0.4756, 0.5057, 0.5720, 0.4632, 0.5177, 0.5125, 0.4718])
return super().test_ip_adapter_single(from_multi=True, expected_pipe_slice=expected_pipe_slice)
def test_inference_batch_consistent(
@@ -313,8 +313,7 @@ class StableDiffusionXLImg2ImgPipelineFastTests(
def test_ip_adapter_single(self):
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array([0.5133, 0.4626, 0.4970, 0.6273, 0.5160, 0.6891, 0.6639, 0.5892, 0.5709])
expected_pipe_slice = np.array([0.5174, 0.4512, 0.5006, 0.6273, 0.5160, 0.6825, 0.6655, 0.5840, 0.5675])
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
def test_stable_diffusion_xl_img2img_tiny_autoencoder(self):
@@ -226,8 +226,7 @@ class StableDiffusionXLInpaintPipelineFastTests(
def test_ip_adapter_single(self):
expected_pipe_slice = None
if torch_device == "cpu":
expected_pipe_slice = np.array([0.8274, 0.5538, 0.6141, 0.5843, 0.6865, 0.7082, 0.5861, 0.6123, 0.5344])
expected_pipe_slice = np.array([0.7971, 0.5371, 0.5973, 0.5642, 0.6689, 0.6894, 0.5770, 0.6063, 0.5261])
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
def test_components_function(self):
@@ -251,7 +250,7 @@ class StableDiffusionXLInpaintPipelineFastTests(
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.8279, 0.5673, 0.6088, 0.6156, 0.6923, 0.7347, 0.6547, 0.6108, 0.5198])
expected_slice = np.array([0.8029, 0.5523, 0.5825, 0.6003, 0.6702, 0.7018, 0.6369, 0.5955, 0.5123])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -386,7 +385,7 @@ class StableDiffusionXLInpaintPipelineFastTests(
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.7540, 0.5231, 0.5833, 0.6217, 0.6339, 0.7067, 0.6507, 0.5672, 0.5030])
expected_slice = np.array([0.7045, 0.4838, 0.5454, 0.6270, 0.6168, 0.6717, 0.6484, 0.5681, 0.4922])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
@@ -182,7 +182,7 @@ class StableUnCLIPImg2ImgPipelineFastTests(
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 32, 32, 3)
expected_slice = np.array([0.4397, 0.7080, 0.5590, 0.4255, 0.7181, 0.5938, 0.4051, 0.3720, 0.5116])
expected_slice = np.array([0.3872, 0.7224, 0.5601, 0.4741, 0.6872, 0.5814, 0.4636, 0.3867, 0.5078])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
-2
View File
@@ -146,7 +146,6 @@ class CustomPipeline(DiffusionPipeline):
class DownloadTests(unittest.TestCase):
@unittest.skip("Flaky behaviour on CI. Re-enable after migrating to new runners")
def test_one_request_upon_cached(self):
# TODO: For some reason this test fails on MPS where no HEAD call is made.
if torch_device == "mps":
@@ -192,7 +191,6 @@ class DownloadTests(unittest.TestCase):
assert "scheduler" in os.listdir(cached_folder)
assert "feature_extractor" in os.listdir(cached_folder)
@unittest.skip("Flaky behaviour on CI. Re-enable after migrating to new runners")
def test_less_downloads_passed_object_calls(self):
# TODO: For some reason this test fails on MPS where no HEAD call is made.
if torch_device == "mps":
+1 -31
View File
@@ -13,7 +13,6 @@ from typing import Any, Callable, Dict, Union
import numpy as np
import PIL.Image
import torch
import torch.nn as nn
from huggingface_hub import ModelCard, delete_repo
from huggingface_hub.utils import is_jinja_available
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
@@ -41,12 +40,7 @@ from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import logging
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
from diffusers.utils.testing_utils import (
CaptureLogger,
require_torch,
skip_mps,
torch_device,
)
from diffusers.utils.testing_utils import CaptureLogger, require_torch, skip_mps, torch_device
from ..models.autoencoders.test_models_vae import (
get_asym_autoencoder_kl_config,
@@ -73,17 +67,6 @@ def check_same_shape(tensor_list):
return all(shape == shapes[0] for shape in shapes[1:])
def check_qkv_fusion_matches_attn_procs_length(model, original_attn_processors):
current_attn_processors = model.attn_processors
return len(current_attn_processors) == len(original_attn_processors)
def check_qkv_fusion_processors_exist(model):
current_attn_processors = model.attn_processors
proc_names = [v.__class__.__name__ for _, v in current_attn_processors.items()]
return all(p.startswith("Fused") for p in proc_names)
class SDFunctionTesterMixin:
"""
This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes.
@@ -213,19 +196,6 @@ class SDFunctionTesterMixin:
original_image_slice = image[0, -3:, -3:, -1]
pipe.fuse_qkv_projections()
for _, component in pipe.components.items():
if (
isinstance(component, nn.Module)
and hasattr(component, "original_attn_processors")
and component.original_attn_processors is not None
):
assert check_qkv_fusion_processors_exist(
component
), "Something wrong with the fused attention processors. Expected all the attention processors to be fused."
assert check_qkv_fusion_matches_attn_procs_length(
component, component.original_attn_processors
), "Something wrong with the attention processors concerning the fused QKV projections."
inputs = self.get_dummy_inputs(device)
inputs["return_dict"] = False
image_fused = pipe(**inputs)[0]
@@ -168,12 +168,8 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
first_frame_slice = result[0, -3:, -3:, -1]
last_frame_slice = result[-1, -3:, -3:, 0]
expected_slice1 = np.array(
[0.6008109, 0.73051643, 0.51778656, 0.55817354, 0.45222935, 0.45998418, 0.57017255, 0.54874814, 0.47078788]
)
expected_slice2 = np.array(
[0.6011751, 0.47420046, 0.41660714, 0.6472957, 0.41261768, 0.5438129, 0.7401535, 0.6756011, 0.53652245]
)
expected_slice1 = np.array([0.48, 0.58, 0.53, 0.59, 0.50, 0.44, 0.60, 0.65, 0.52])
expected_slice2 = np.array([0.66, 0.49, 0.40, 0.70, 0.47, 0.51, 0.73, 0.65, 0.52])
assert np.abs(first_frame_slice.flatten() - expected_slice1).max() < 1e-2
assert np.abs(last_frame_slice.flatten() - expected_slice2).max() < 1e-2
+1 -1
View File
@@ -76,7 +76,7 @@ def main(correct, fail=None):
done_tests = defaultdict(int)
for line in correct_lines:
file, class_name, test_name, correct_line = line.split("::")
file, class_name, test_name, correct_line = line.split(";")
if test_failures is None or "::".join([file, class_name, test_name]) in test_failures:
overwrite_file(file, class_name, test_name, correct_line, done_tests)