Compare commits

..

1 Commits

Author SHA1 Message Date
Wauplin fd1da2c797 Use HF_TOKEN env var in CI 2024-05-21 09:13:14 +02:00
5 changed files with 199 additions and 59 deletions
+4 -5
View File
@@ -25,17 +25,17 @@ jobs:
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Check out code
uses: actions/checkout@v3
- name: Find Changed Dockerfiles
id: file_changes
uses: jitterbit/get-changed-files@v1
with:
format: 'space-delimited'
token: ${{ secrets.GITHUB_TOKEN }}
- name: Build Changed Docker Images
run: |
CHANGED_FILES="${{ steps.file_changes.outputs.all }}"
@@ -52,7 +52,7 @@ jobs:
build-and-push-docker-images:
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
if: github.event_name != 'pull_request'
permissions:
contents: read
packages: write
@@ -69,7 +69,6 @@ jobs:
- diffusers-flax-tpu
- diffusers-onnxruntime-cpu
- diffusers-onnxruntime-cuda
- diffusers-doc-builder
steps:
- name: Checkout repository
-50
View File
@@ -1,50 +0,0 @@
FROM ubuntu:20.04
LABEL maintainer="Hugging Face"
LABEL repository="diffusers"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa
RUN apt install -y bash \
build-essential \
git \
git-lfs \
curl \
ca-certificates \
libsndfile1-dev \
python3.10 \
python3-pip \
libgl1 \
python3.10-venv && \
rm -rf /var/lib/apt/lists
# make sure to use venv
RUN python3.10 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
python3.10 -m uv pip install --no-cache-dir \
torch \
torchvision \
torchaudio \
invisible_watermark \
--extra-index-url https://download.pytorch.org/whl/cpu && \
python3.10 -m uv pip install --no-cache-dir \
accelerate \
datasets \
hf-doc-builder \
huggingface-hub \
Jinja2 \
librosa \
numpy \
scipy \
tensorboard \
transformers \
matplotlib \
setuptools==69.5.1
CMD ["/bin/bash"]
+3 -3
View File
@@ -826,8 +826,8 @@ def convert_ldm_unet_checkpoint(checkpoint, config, extract_ema=False, **kwargs)
# at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
if sum(k.startswith("model_ema") for k in keys) > 100 and extract_ema:
logger.warning("Checkpoint has both EMA and non-EMA weights.")
logger.warning(
logger.warninging("Checkpoint has both EMA and non-EMA weights.")
logger.warninging(
"In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
" weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
)
@@ -837,7 +837,7 @@ def convert_ldm_unet_checkpoint(checkpoint, config, extract_ema=False, **kwargs)
unet_state_dict[key.replace(unet_key, "")] = checkpoint.get(flat_ema_key)
else:
if sum(k.startswith("model_ema") for k in keys) > 100:
logger.warning(
logger.warninging(
"In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
" weights (usually better for inference), please make sure to add the `--extract_ema` flag."
)
@@ -178,7 +178,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
feature_extractor=feature_extractor,
)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.video_processor = VideoProcessor(do_resize=True, vae_scale_factor=self.vae_scale_factor)
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
def _encode_image(
self,
@@ -0,0 +1,191 @@
# coding=utf-8
# Copyright 2024 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import unittest
import torch
from diffusers import StableCascadeUNet
from diffusers.utils import logging
from diffusers.utils.testing_utils import (
enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_gpu,
slow,
)
from diffusers.utils.torch_utils import randn_tensor
logger = logging.get_logger(__name__)
enable_full_determinism()
@slow
class StableCascadeUNetModelSlowTests(unittest.TestCase):
def tearDown(self) -> None:
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_stable_cascade_unet_prior_single_file_components(self):
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors"
single_file_unet = StableCascadeUNet.from_single_file(single_file_url)
single_file_unet_config = single_file_unet.config
del single_file_unet
gc.collect()
torch.cuda.empty_cache()
unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade-prior", subfolder="prior", variant="bf16")
unet_config = unet.config
del unet
gc.collect()
torch.cuda.empty_cache()
PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"]
for param_name, param_value in single_file_unet_config.items():
if param_name in PARAMS_TO_IGNORE:
continue
assert unet_config[param_name] == param_value
def test_stable_cascade_unet_decoder_single_file_components(self):
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_b_bf16.safetensors"
single_file_unet = StableCascadeUNet.from_single_file(single_file_url)
single_file_unet_config = single_file_unet.config
del single_file_unet
gc.collect()
torch.cuda.empty_cache()
unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade", subfolder="decoder", variant="bf16")
unet_config = unet.config
del unet
gc.collect()
torch.cuda.empty_cache()
PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"]
for param_name, param_value in single_file_unet_config.items():
if param_name in PARAMS_TO_IGNORE:
continue
assert unet_config[param_name] == param_value
def test_stable_cascade_unet_config_loading(self):
config = StableCascadeUNet.load_config(
pretrained_model_name_or_path="diffusers/stable-cascade-configs", subfolder="prior"
)
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors"
single_file_unet = StableCascadeUNet.from_single_file(single_file_url, config=config)
single_file_unet_config = single_file_unet.config
del single_file_unet
gc.collect()
torch.cuda.empty_cache()
PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"]
for param_name, param_value in config.items():
if param_name in PARAMS_TO_IGNORE:
continue
assert single_file_unet_config[param_name] == param_value
@require_torch_gpu
def test_stable_cascade_unet_single_file_prior_forward_pass(self):
dtype = torch.bfloat16
generator = torch.Generator("cpu")
model_inputs = {
"sample": randn_tensor((1, 16, 24, 24), generator=generator.manual_seed(0)).to("cuda", dtype),
"timestep_ratio": torch.tensor([1]).to("cuda", dtype),
"clip_text_pooled": randn_tensor((1, 1, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
"clip_text": randn_tensor((1, 77, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
"clip_img": randn_tensor((1, 1, 768), generator=generator.manual_seed(0)).to("cuda", dtype),
"pixels": randn_tensor((1, 3, 8, 8), generator=generator.manual_seed(0)).to("cuda", dtype),
}
unet = StableCascadeUNet.from_pretrained(
"stabilityai/stable-cascade-prior",
subfolder="prior",
revision="refs/pr/2",
variant="bf16",
torch_dtype=dtype,
)
unet.to("cuda")
with torch.no_grad():
prior_output = unet(**model_inputs).sample.float().cpu().numpy()
# Remove UNet from GPU memory before loading the single file UNet model
del unet
gc.collect()
torch.cuda.empty_cache()
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors"
single_file_unet = StableCascadeUNet.from_single_file(single_file_url, torch_dtype=dtype)
single_file_unet.to("cuda")
with torch.no_grad():
prior_single_file_output = single_file_unet(**model_inputs).sample.float().cpu().numpy()
# Remove UNet from GPU memory before loading the single file UNet model
del single_file_unet
gc.collect()
torch.cuda.empty_cache()
max_diff = numpy_cosine_similarity_distance(prior_output.flatten(), prior_single_file_output.flatten())
assert max_diff < 8e-3
@require_torch_gpu
def test_stable_cascade_unet_single_file_decoder_forward_pass(self):
dtype = torch.float32
generator = torch.Generator("cpu")
model_inputs = {
"sample": randn_tensor((1, 4, 256, 256), generator=generator.manual_seed(0)).to("cuda", dtype),
"timestep_ratio": torch.tensor([1]).to("cuda", dtype),
"clip_text": randn_tensor((1, 77, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
"clip_text_pooled": randn_tensor((1, 1, 1280), generator=generator.manual_seed(0)).to("cuda", dtype),
"pixels": randn_tensor((1, 3, 8, 8), generator=generator.manual_seed(0)).to("cuda", dtype),
}
unet = StableCascadeUNet.from_pretrained(
"stabilityai/stable-cascade",
subfolder="decoder",
revision="refs/pr/44",
torch_dtype=dtype,
)
unet.to("cuda")
with torch.no_grad():
prior_output = unet(**model_inputs).sample.float().cpu().numpy()
# Remove UNet from GPU memory before loading the single file UNet model
del unet
gc.collect()
torch.cuda.empty_cache()
single_file_url = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_b.safetensors"
single_file_unet = StableCascadeUNet.from_single_file(single_file_url, torch_dtype=dtype)
single_file_unet.to("cuda")
with torch.no_grad():
prior_single_file_output = single_file_unet(**model_inputs).sample.float().cpu().numpy()
# Remove UNet from GPU memory before loading the single file UNet model
del single_file_unet
gc.collect()
torch.cuda.empty_cache()
max_diff = numpy_cosine_similarity_distance(prior_output.flatten(), prior_single_file_output.flatten())
assert max_diff < 1e-4