Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a75bc61b71 | |||
| 8de27500da | |||
| 5575303be0 | |||
| 235ff8808f |
@@ -19,7 +19,7 @@ env:
|
||||
jobs:
|
||||
setup_torch_cuda_pipeline_matrix:
|
||||
name: Setup Torch Pipelines Matrix
|
||||
runs-on: diffusers/diffusers-pytorch-cpu
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
|
||||
steps:
|
||||
@@ -67,19 +67,19 @@ jobs:
|
||||
fetch-depth: 2
|
||||
- name: NVIDIA-SMI
|
||||
run: nvidia-smi
|
||||
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
python -m uv pip install pytest-reportlog
|
||||
|
||||
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Nightly PyTorch CUDA checkpoint (pipelines) tests
|
||||
|
||||
- name: Nightly PyTorch CUDA checkpoint (pipelines) tests
|
||||
env:
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||
@@ -88,9 +88,9 @@ jobs:
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "not Flax and not Onnx" \
|
||||
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
|
||||
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
|
||||
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
|
||||
tests/pipelines/${{ matrix.module }}
|
||||
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
@@ -103,7 +103,7 @@ jobs:
|
||||
with:
|
||||
name: pipeline_${{ matrix.module }}_test_reports
|
||||
path: reports
|
||||
|
||||
|
||||
- name: Generate Report and Notify Channel
|
||||
if: always()
|
||||
run: |
|
||||
@@ -139,7 +139,7 @@ jobs:
|
||||
run: python utils/print_env.py
|
||||
|
||||
- name: Run nightly PyTorch CUDA tests for non-pipeline modules
|
||||
if: ${{ matrix.module != 'examples'}}
|
||||
if: ${{ matrix.module != 'examples'}}
|
||||
env:
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "not Flax and not Onnx" \
|
||||
--make-reports=tests_torch_${{ matrix.module }}_cuda \
|
||||
--report-log=tests_torch_${{ matrix.module }}_cuda.log \
|
||||
--report-log=tests_torch_${{ matrix.module }}_cuda.log \
|
||||
tests/${{ matrix.module }}
|
||||
|
||||
- name: Run nightly example tests with Torch
|
||||
@@ -161,13 +161,13 @@ jobs:
|
||||
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v --make-reports=examples_torch_cuda \
|
||||
--report-log=examples_torch_cuda.log \
|
||||
--report-log=examples_torch_cuda.log \
|
||||
examples/
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
|
||||
cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
|
||||
cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
@@ -218,13 +218,13 @@ jobs:
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "not Flax and not Onnx" \
|
||||
--make-reports=tests_torch_lora_cuda \
|
||||
--report-log=tests_torch_lora_cuda.log \
|
||||
--report-log=tests_torch_lora_cuda.log \
|
||||
tests/lora
|
||||
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_torch_lora_cuda_stats.txt
|
||||
cat reports/tests_torch_lora_cuda_stats.txt
|
||||
cat reports/tests_torch_lora_cuda_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
@@ -239,12 +239,12 @@ jobs:
|
||||
run: |
|
||||
pip install slack_sdk tabulate
|
||||
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
|
||||
run_flax_tpu_tests:
|
||||
name: Nightly Flax TPU Tests
|
||||
runs-on: docker-tpu
|
||||
if: github.event_name == 'schedule'
|
||||
|
||||
|
||||
container:
|
||||
image: diffusers/diffusers-flax-tpu
|
||||
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
|
||||
@@ -274,7 +274,7 @@ jobs:
|
||||
python -m pytest -n 0 \
|
||||
-s -v -k "Flax" \
|
||||
--make-reports=tests_flax_tpu \
|
||||
--report-log=tests_flax_tpu.log \
|
||||
--report-log=tests_flax_tpu.log \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
@@ -302,7 +302,7 @@ jobs:
|
||||
container:
|
||||
image: diffusers/diffusers-onnxruntime-cuda
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
|
||||
|
||||
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
@@ -321,7 +321,7 @@ jobs:
|
||||
|
||||
- name: Environment
|
||||
run: python utils/print_env.py
|
||||
|
||||
|
||||
- name: Run nightly ONNXRuntime CUDA tests
|
||||
env:
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
@@ -329,7 +329,7 @@ jobs:
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "Onnx" \
|
||||
--make-reports=tests_onnx_cuda \
|
||||
--report-log=tests_onnx_cuda.log \
|
||||
--report-log=tests_onnx_cuda.log \
|
||||
tests/
|
||||
|
||||
- name: Failure short reports
|
||||
@@ -344,7 +344,7 @@ jobs:
|
||||
with:
|
||||
name: ${{ matrix.config.report }}_test_reports
|
||||
path: reports
|
||||
|
||||
|
||||
- name: Generate Report and Notify Channel
|
||||
if: always()
|
||||
run: |
|
||||
|
||||
@@ -21,7 +21,7 @@ env:
|
||||
jobs:
|
||||
setup_torch_cuda_pipeline_matrix:
|
||||
name: Setup Torch Pipelines CUDA Slow Tests Matrix
|
||||
runs-on: diffusers/diffusers-pytorch-cpu
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
|
||||
steps:
|
||||
|
||||
@@ -1304,11 +1304,7 @@ class DemoFusionSDXLPipeline(
|
||||
if isinstance(component, torch.nn.Module):
|
||||
if hasattr(component, "_hf_hook"):
|
||||
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
||||
is_sequential_cpu_offload = (
|
||||
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||
or hasattr(component._hf_hook, "hooks")
|
||||
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||
)
|
||||
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||
logger.info(
|
||||
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
||||
)
|
||||
|
||||
@@ -369,11 +369,7 @@ class LoraLoaderMixin:
|
||||
if not is_model_cpu_offload:
|
||||
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
|
||||
if not is_sequential_cpu_offload:
|
||||
is_sequential_cpu_offload = (
|
||||
isinstance(component._hf_hook, AlignDevicesHook)
|
||||
or hasattr(component._hf_hook, "hooks")
|
||||
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||
)
|
||||
is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)
|
||||
|
||||
logger.info(
|
||||
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
||||
|
||||
@@ -423,11 +423,7 @@ class TextualInversionLoaderMixin:
|
||||
if isinstance(component, nn.Module):
|
||||
if hasattr(component, "_hf_hook"):
|
||||
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
||||
is_sequential_cpu_offload = (
|
||||
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||
or hasattr(component._hf_hook, "hooks")
|
||||
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||
)
|
||||
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||
logger.info(
|
||||
"Accelerate hooks detected. Since you have called `load_textual_inversion()`, the previous hooks will be first removed. Then the textual inversion parameters will be loaded and the hooks will be applied again."
|
||||
)
|
||||
|
||||
@@ -359,11 +359,7 @@ class UNet2DConditionLoadersMixin:
|
||||
for _, component in _pipeline.components.items():
|
||||
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
|
||||
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
|
||||
is_sequential_cpu_offload = (
|
||||
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||
or hasattr(component._hf_hook, "hooks")
|
||||
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
|
||||
)
|
||||
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
|
||||
|
||||
logger.info(
|
||||
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
|
||||
|
||||
@@ -227,9 +227,6 @@ class DiTPipeline(DiffusionPipeline):
|
||||
if output_type == "pil":
|
||||
samples = self.numpy_to_pil(samples)
|
||||
|
||||
# Offload all models
|
||||
self.maybe_free_model_hooks()
|
||||
|
||||
if not return_dict:
|
||||
return (samples,)
|
||||
|
||||
|
||||
@@ -376,11 +376,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
|
||||
return False
|
||||
|
||||
return hasattr(module, "_hf_hook") and (
|
||||
isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
|
||||
or hasattr(module._hf_hook, "hooks")
|
||||
and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook)
|
||||
)
|
||||
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
|
||||
|
||||
def module_is_offloaded(module):
|
||||
if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"):
|
||||
@@ -1009,7 +1005,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
"""
|
||||
for _, model in self.components.items():
|
||||
if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"):
|
||||
accelerate.hooks.remove_hook_from_module(model, recurse=True)
|
||||
is_sequential_cpu_offload = isinstance(getattr(model, "_hf_hook"), accelerate.hooks.AlignDevicesHook)
|
||||
accelerate.hooks.remove_hook_from_module(model, recurse=is_sequential_cpu_offload)
|
||||
self._all_hooks = []
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
||||
|
||||
+8
-87
@@ -172,7 +172,6 @@ class StableDiffusionInstructPix2PixPipeline(
|
||||
prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
||||
ip_adapter_image: Optional[PipelineImageInput] = None,
|
||||
ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
|
||||
output_type: Optional[str] = "pil",
|
||||
return_dict: bool = True,
|
||||
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
||||
@@ -297,8 +296,6 @@ class StableDiffusionInstructPix2PixPipeline(
|
||||
negative_prompt,
|
||||
prompt_embeds,
|
||||
negative_prompt_embeds,
|
||||
ip_adapter_image,
|
||||
ip_adapter_image_embeds,
|
||||
callback_on_step_end_tensor_inputs,
|
||||
)
|
||||
self._guidance_scale = guidance_scale
|
||||
@@ -306,6 +303,14 @@ class StableDiffusionInstructPix2PixPipeline(
|
||||
|
||||
device = self._execution_device
|
||||
|
||||
if ip_adapter_image is not None:
|
||||
output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
|
||||
image_embeds, negative_image_embeds = self.encode_image(
|
||||
ip_adapter_image, device, num_images_per_prompt, output_hidden_state
|
||||
)
|
||||
if self.do_classifier_free_guidance:
|
||||
image_embeds = torch.cat([image_embeds, negative_image_embeds, negative_image_embeds])
|
||||
|
||||
if image is None:
|
||||
raise ValueError("`image` input cannot be undefined.")
|
||||
|
||||
@@ -330,14 +335,6 @@ class StableDiffusionInstructPix2PixPipeline(
|
||||
negative_prompt_embeds=negative_prompt_embeds,
|
||||
)
|
||||
|
||||
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
||||
image_embeds = self.prepare_ip_adapter_image_embeds(
|
||||
ip_adapter_image,
|
||||
ip_adapter_image_embeds,
|
||||
device,
|
||||
batch_size * num_images_per_prompt,
|
||||
self.do_classifier_free_guidance,
|
||||
)
|
||||
# 3. Preprocess image
|
||||
image = self.image_processor.preprocess(image)
|
||||
|
||||
@@ -638,65 +635,6 @@ class StableDiffusionInstructPix2PixPipeline(
|
||||
|
||||
return image_embeds, uncond_image_embeds
|
||||
|
||||
def prepare_ip_adapter_image_embeds(
|
||||
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
|
||||
):
|
||||
if ip_adapter_image_embeds is None:
|
||||
if not isinstance(ip_adapter_image, list):
|
||||
ip_adapter_image = [ip_adapter_image]
|
||||
|
||||
if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
|
||||
)
|
||||
|
||||
image_embeds = []
|
||||
for single_ip_adapter_image, image_proj_layer in zip(
|
||||
ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
|
||||
):
|
||||
output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
|
||||
single_image_embeds, single_negative_image_embeds = self.encode_image(
|
||||
single_ip_adapter_image, device, 1, output_hidden_state
|
||||
)
|
||||
single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
|
||||
single_negative_image_embeds = torch.stack(
|
||||
[single_negative_image_embeds] * num_images_per_prompt, dim=0
|
||||
)
|
||||
|
||||
if do_classifier_free_guidance:
|
||||
single_image_embeds = torch.cat(
|
||||
[single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
|
||||
)
|
||||
single_image_embeds = single_image_embeds.to(device)
|
||||
|
||||
image_embeds.append(single_image_embeds)
|
||||
else:
|
||||
repeat_dims = [1]
|
||||
image_embeds = []
|
||||
for single_image_embeds in ip_adapter_image_embeds:
|
||||
if do_classifier_free_guidance:
|
||||
(
|
||||
single_image_embeds,
|
||||
single_negative_image_embeds,
|
||||
single_negative_image_embeds,
|
||||
) = single_image_embeds.chunk(3)
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
single_negative_image_embeds = single_negative_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
|
||||
)
|
||||
single_image_embeds = torch.cat(
|
||||
[single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
|
||||
)
|
||||
else:
|
||||
single_image_embeds = single_image_embeds.repeat(
|
||||
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
|
||||
)
|
||||
image_embeds.append(single_image_embeds)
|
||||
|
||||
return image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
|
||||
def run_safety_checker(self, image, device, dtype):
|
||||
if self.safety_checker is None:
|
||||
@@ -749,8 +687,6 @@ class StableDiffusionInstructPix2PixPipeline(
|
||||
negative_prompt=None,
|
||||
prompt_embeds=None,
|
||||
negative_prompt_embeds=None,
|
||||
ip_adapter_image=None,
|
||||
ip_adapter_image_embeds=None,
|
||||
callback_on_step_end_tensor_inputs=None,
|
||||
):
|
||||
if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
|
||||
@@ -792,21 +728,6 @@ class StableDiffusionInstructPix2PixPipeline(
|
||||
f" {negative_prompt_embeds.shape}."
|
||||
)
|
||||
|
||||
if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
|
||||
raise ValueError(
|
||||
"Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
|
||||
)
|
||||
|
||||
if ip_adapter_image_embeds is not None:
|
||||
if not isinstance(ip_adapter_image_embeds, list):
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
|
||||
)
|
||||
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
|
||||
raise ValueError(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
||||
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
||||
shape = (
|
||||
|
||||
+1
@@ -436,6 +436,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_instruct_pix2pix.StableDiffusionInstructPix2PixPipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -324,6 +324,10 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||
|
||||
# PixArt transformer model does not work with sequential offload so skip it for now
|
||||
def test_sequential_offload_forward_pass_twice(self):
|
||||
pass
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -308,6 +308,10 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||
|
||||
# PixArt transformer model does not work with sequential offload so skip it for now
|
||||
def test_sequential_offload_forward_pass_twice(self):
|
||||
pass
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -1257,8 +1257,8 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
|
||||
def test_download_from_hub(self):
|
||||
ckpt_paths = [
|
||||
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
|
||||
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors",
|
||||
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt",
|
||||
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix_base.ckpt",
|
||||
]
|
||||
|
||||
for ckpt_path in ckpt_paths:
|
||||
@@ -1271,7 +1271,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
assert image_out.shape == (512, 512, 3)
|
||||
|
||||
def test_download_local(self):
|
||||
ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.safetensors")
|
||||
ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.ckpt")
|
||||
config_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-inference.yaml")
|
||||
|
||||
pipe = StableDiffusionPipeline.from_single_file(
|
||||
@@ -1285,7 +1285,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
assert image_out.shape == (512, 512, 3)
|
||||
|
||||
def test_download_ckpt_diff_format_is_same(self):
|
||||
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
|
||||
|
||||
sf_pipe = StableDiffusionPipeline.from_single_file(ckpt_path)
|
||||
sf_pipe.scheduler = DDIMScheduler.from_config(sf_pipe.scheduler.config)
|
||||
@@ -1310,7 +1310,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
def test_single_file_component_configs(self):
|
||||
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
|
||||
|
||||
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
|
||||
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
|
||||
single_file_pipe = StableDiffusionPipeline.from_single_file(ckpt_path, load_safety_checker=True)
|
||||
|
||||
for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items():
|
||||
|
||||
@@ -1360,8 +1360,6 @@ class PipelineTesterMixin:
|
||||
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
|
||||
)
|
||||
def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
|
||||
import accelerate
|
||||
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for component in pipe.components.values():
|
||||
@@ -1375,7 +1373,6 @@ class PipelineTesterMixin:
|
||||
output_without_offload = pipe(**inputs)[0]
|
||||
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
assert pipe._execution_device.type == pipe._offload_device.type
|
||||
|
||||
inputs = self.get_dummy_inputs(generator_device)
|
||||
output_with_offload = pipe(**inputs)[0]
|
||||
@@ -1383,48 +1380,11 @@ class PipelineTesterMixin:
|
||||
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
|
||||
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
|
||||
|
||||
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||
offloaded_modules = {
|
||||
k: v
|
||||
for k, v in pipe.components.items()
|
||||
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||
}
|
||||
# 1. all offloaded modules should be saved to cpu and moved to meta device
|
||||
self.assertTrue(
|
||||
all(v.device.type == "meta" for v in offloaded_modules.values()),
|
||||
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
|
||||
)
|
||||
# 2. all offloaded modules should have hook installed
|
||||
self.assertTrue(
|
||||
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||
)
|
||||
# 3. all offloaded modules should have correct hooks installed, should be either one of these two
|
||||
# - `AlignDevicesHook`
|
||||
# - a SequentialHook` that contains `AlignDevicesHook`
|
||||
offloaded_modules_with_incorrect_hooks = {}
|
||||
for k, v in offloaded_modules.items():
|
||||
if hasattr(v, "_hf_hook"):
|
||||
if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
|
||||
# if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
|
||||
for hook in v._hf_hook.hooks:
|
||||
if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
|
||||
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
|
||||
elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
|
||||
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
||||
|
||||
self.assertTrue(
|
||||
len(offloaded_modules_with_incorrect_hooks) == 0,
|
||||
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
|
||||
)
|
||||
|
||||
@unittest.skipIf(
|
||||
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"),
|
||||
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
|
||||
)
|
||||
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
|
||||
import accelerate
|
||||
|
||||
generator_device = "cpu"
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
@@ -1440,39 +1400,19 @@ class PipelineTesterMixin:
|
||||
output_without_offload = pipe(**inputs)[0]
|
||||
|
||||
pipe.enable_model_cpu_offload()
|
||||
assert pipe._execution_device.type == pipe._offload_device.type
|
||||
|
||||
inputs = self.get_dummy_inputs(generator_device)
|
||||
output_with_offload = pipe(**inputs)[0]
|
||||
|
||||
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
|
||||
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
|
||||
|
||||
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||
offloaded_modules = {
|
||||
k: v
|
||||
offloaded_modules = [
|
||||
v
|
||||
for k, v in pipe.components.items()
|
||||
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||
}
|
||||
# 1. check if all offloaded modules are saved to cpu
|
||||
self.assertTrue(
|
||||
all(v.device.type == "cpu" for v in offloaded_modules.values()),
|
||||
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
|
||||
)
|
||||
# 2. check if all offloaded modules have hooks installed
|
||||
self.assertTrue(
|
||||
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||
)
|
||||
# 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
|
||||
offloaded_modules_with_incorrect_hooks = {}
|
||||
for k, v in offloaded_modules.items():
|
||||
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
|
||||
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
||||
|
||||
self.assertTrue(
|
||||
len(offloaded_modules_with_incorrect_hooks) == 0,
|
||||
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
|
||||
]
|
||||
(
|
||||
self.assertTrue(all(v.device.type == "cpu" for v in offloaded_modules)),
|
||||
f"Not offloaded: {[v for v in offloaded_modules if v.device.type != 'cpu']}",
|
||||
)
|
||||
|
||||
@unittest.skipIf(
|
||||
@@ -1504,24 +1444,16 @@ class PipelineTesterMixin:
|
||||
self.assertLess(
|
||||
max_diff, expected_max_diff, "running CPU offloading 2nd time should not affect the inference results"
|
||||
)
|
||||
|
||||
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||
offloaded_modules = {
|
||||
k: v
|
||||
for k, v in pipe.components.items()
|
||||
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||
}
|
||||
# 1. check if all offloaded modules are saved to cpu
|
||||
self.assertTrue(
|
||||
all(v.device.type == "cpu" for v in offloaded_modules.values()),
|
||||
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
|
||||
)
|
||||
# 2. check if all offloaded modules have hooks installed
|
||||
self.assertTrue(
|
||||
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||
)
|
||||
# 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
|
||||
|
||||
offloaded_modules_with_incorrect_hooks = {}
|
||||
for k, v in offloaded_modules.items():
|
||||
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
|
||||
@@ -1561,36 +1493,19 @@ class PipelineTesterMixin:
|
||||
self.assertLess(
|
||||
max_diff, expected_max_diff, "running sequential offloading second time should have the inference results"
|
||||
)
|
||||
|
||||
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
|
||||
offloaded_modules = {
|
||||
k: v
|
||||
for k, v in pipe.components.items()
|
||||
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
|
||||
}
|
||||
# 1. check if all offloaded modules are moved to meta device
|
||||
self.assertTrue(
|
||||
all(v.device.type == "meta" for v in offloaded_modules.values()),
|
||||
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
|
||||
)
|
||||
# 2. check if all offloaded modules have hook installed
|
||||
self.assertTrue(
|
||||
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
|
||||
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
|
||||
)
|
||||
# 3. check if all offloaded modules have correct hooks installed, should be either one of these two
|
||||
# - `AlignDevicesHook`
|
||||
# - a SequentialHook` that contains `AlignDevicesHook`
|
||||
offloaded_modules_with_incorrect_hooks = {}
|
||||
for k, v in offloaded_modules.items():
|
||||
if hasattr(v, "_hf_hook"):
|
||||
if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
|
||||
# if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
|
||||
for hook in v._hf_hook.hooks:
|
||||
if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
|
||||
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
|
||||
elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
|
||||
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
||||
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
|
||||
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
|
||||
|
||||
self.assertTrue(
|
||||
len(offloaded_modules_with_incorrect_hooks) == 0,
|
||||
|
||||
Reference in New Issue
Block a user