Compare commits

..

4 Commits

Author SHA1 Message Date
Sayak Paul a75bc61b71 Merge branch 'main' into fix-model-device-map 2024-05-01 08:16:12 +05:30
sayakpaul 8de27500da Empty-Commit 2024-04-30 20:28:42 +05:30
sayakpaul 5575303be0 remove patch file 2024-04-30 20:06:24 +05:30
sayakpaul 235ff8808f fix: device module tests 2024-04-30 19:49:16 +05:30
14 changed files with 61 additions and 238 deletions
+22 -22
View File
@@ -19,7 +19,7 @@ env:
jobs:
setup_torch_cuda_pipeline_matrix:
name: Setup Torch Pipelines Matrix
runs-on: diffusers/diffusers-pytorch-cpu
runs-on: ubuntu-latest
outputs:
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
steps:
@@ -67,19 +67,19 @@ jobs:
fetch-depth: 2
- name: NVIDIA-SMI
run: nvidia-smi
- name: Install dependencies
run: |
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
python -m uv pip install -e [quality,test]
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
python -m uv pip install pytest-reportlog
- name: Environment
run: |
python utils/print_env.py
- name: Nightly PyTorch CUDA checkpoint (pipelines) tests
- name: Nightly PyTorch CUDA checkpoint (pipelines) tests
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
@@ -88,9 +88,9 @@ jobs:
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
--report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
tests/pipelines/${{ matrix.module }}
- name: Failure short reports
if: ${{ failure() }}
run: |
@@ -103,7 +103,7 @@ jobs:
with:
name: pipeline_${{ matrix.module }}_test_reports
path: reports
- name: Generate Report and Notify Channel
if: always()
run: |
@@ -139,7 +139,7 @@ jobs:
run: python utils/print_env.py
- name: Run nightly PyTorch CUDA tests for non-pipeline modules
if: ${{ matrix.module != 'examples'}}
if: ${{ matrix.module != 'examples'}}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
@@ -148,7 +148,7 @@ jobs:
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_${{ matrix.module }}_cuda \
--report-log=tests_torch_${{ matrix.module }}_cuda.log \
--report-log=tests_torch_${{ matrix.module }}_cuda.log \
tests/${{ matrix.module }}
- name: Run nightly example tests with Torch
@@ -161,13 +161,13 @@ jobs:
python -m uv pip install peft@git+https://github.com/huggingface/peft.git
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v --make-reports=examples_torch_cuda \
--report-log=examples_torch_cuda.log \
--report-log=examples_torch_cuda.log \
examples/
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt
- name: Test suite reports artifacts
@@ -218,13 +218,13 @@ jobs:
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_torch_lora_cuda \
--report-log=tests_torch_lora_cuda.log \
--report-log=tests_torch_lora_cuda.log \
tests/lora
- name: Failure short reports
if: ${{ failure() }}
run: |
cat reports/tests_torch_lora_cuda_stats.txt
cat reports/tests_torch_lora_cuda_stats.txt
cat reports/tests_torch_lora_cuda_failures_short.txt
- name: Test suite reports artifacts
@@ -239,12 +239,12 @@ jobs:
run: |
pip install slack_sdk tabulate
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
run_flax_tpu_tests:
name: Nightly Flax TPU Tests
runs-on: docker-tpu
if: github.event_name == 'schedule'
container:
image: diffusers/diffusers-flax-tpu
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
@@ -274,7 +274,7 @@ jobs:
python -m pytest -n 0 \
-s -v -k "Flax" \
--make-reports=tests_flax_tpu \
--report-log=tests_flax_tpu.log \
--report-log=tests_flax_tpu.log \
tests/
- name: Failure short reports
@@ -302,7 +302,7 @@ jobs:
container:
image: diffusers/diffusers-onnxruntime-cuda
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -321,7 +321,7 @@ jobs:
- name: Environment
run: python utils/print_env.py
- name: Run nightly ONNXRuntime CUDA tests
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
@@ -329,7 +329,7 @@ jobs:
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Onnx" \
--make-reports=tests_onnx_cuda \
--report-log=tests_onnx_cuda.log \
--report-log=tests_onnx_cuda.log \
tests/
- name: Failure short reports
@@ -344,7 +344,7 @@ jobs:
with:
name: ${{ matrix.config.report }}_test_reports
path: reports
- name: Generate Report and Notify Channel
if: always()
run: |
+1 -1
View File
@@ -21,7 +21,7 @@ env:
jobs:
setup_torch_cuda_pipeline_matrix:
name: Setup Torch Pipelines CUDA Slow Tests Matrix
runs-on: diffusers/diffusers-pytorch-cpu
runs-on: ubuntu-latest
outputs:
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
steps:
@@ -1304,11 +1304,7 @@ class DemoFusionSDXLPipeline(
if isinstance(component, torch.nn.Module):
if hasattr(component, "_hf_hook"):
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
is_sequential_cpu_offload = (
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
or hasattr(component._hf_hook, "hooks")
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
)
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
logger.info(
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
)
+1 -5
View File
@@ -369,11 +369,7 @@ class LoraLoaderMixin:
if not is_model_cpu_offload:
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
if not is_sequential_cpu_offload:
is_sequential_cpu_offload = (
isinstance(component._hf_hook, AlignDevicesHook)
or hasattr(component._hf_hook, "hooks")
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
)
is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)
logger.info(
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
+1 -5
View File
@@ -423,11 +423,7 @@ class TextualInversionLoaderMixin:
if isinstance(component, nn.Module):
if hasattr(component, "_hf_hook"):
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
is_sequential_cpu_offload = (
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
or hasattr(component._hf_hook, "hooks")
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
)
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
logger.info(
"Accelerate hooks detected. Since you have called `load_textual_inversion()`, the previous hooks will be first removed. Then the textual inversion parameters will be loaded and the hooks will be applied again."
)
+1 -5
View File
@@ -359,11 +359,7 @@ class UNet2DConditionLoadersMixin:
for _, component in _pipeline.components.items():
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
is_sequential_cpu_offload = (
isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
or hasattr(component._hf_hook, "hooks")
and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
)
is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
logger.info(
"Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
@@ -227,9 +227,6 @@ class DiTPipeline(DiffusionPipeline):
if output_type == "pil":
samples = self.numpy_to_pil(samples)
# Offload all models
self.maybe_free_model_hooks()
if not return_dict:
return (samples,)
+3 -6
View File
@@ -376,11 +376,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
return False
return hasattr(module, "_hf_hook") and (
isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
or hasattr(module._hf_hook, "hooks")
and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook)
)
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
def module_is_offloaded(module):
if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"):
@@ -1009,7 +1005,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
"""
for _, model in self.components.items():
if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"):
accelerate.hooks.remove_hook_from_module(model, recurse=True)
is_sequential_cpu_offload = isinstance(getattr(model, "_hf_hook"), accelerate.hooks.AlignDevicesHook)
accelerate.hooks.remove_hook_from_module(model, recurse=is_sequential_cpu_offload)
self._all_hooks = []
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
@@ -172,7 +172,6 @@ class StableDiffusionInstructPix2PixPipeline(
prompt_embeds: Optional[torch.FloatTensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
ip_adapter_image: Optional[PipelineImageInput] = None,
ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
output_type: Optional[str] = "pil",
return_dict: bool = True,
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
@@ -297,8 +296,6 @@ class StableDiffusionInstructPix2PixPipeline(
negative_prompt,
prompt_embeds,
negative_prompt_embeds,
ip_adapter_image,
ip_adapter_image_embeds,
callback_on_step_end_tensor_inputs,
)
self._guidance_scale = guidance_scale
@@ -306,6 +303,14 @@ class StableDiffusionInstructPix2PixPipeline(
device = self._execution_device
if ip_adapter_image is not None:
output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
image_embeds, negative_image_embeds = self.encode_image(
ip_adapter_image, device, num_images_per_prompt, output_hidden_state
)
if self.do_classifier_free_guidance:
image_embeds = torch.cat([image_embeds, negative_image_embeds, negative_image_embeds])
if image is None:
raise ValueError("`image` input cannot be undefined.")
@@ -330,14 +335,6 @@ class StableDiffusionInstructPix2PixPipeline(
negative_prompt_embeds=negative_prompt_embeds,
)
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
image_embeds = self.prepare_ip_adapter_image_embeds(
ip_adapter_image,
ip_adapter_image_embeds,
device,
batch_size * num_images_per_prompt,
self.do_classifier_free_guidance,
)
# 3. Preprocess image
image = self.image_processor.preprocess(image)
@@ -638,65 +635,6 @@ class StableDiffusionInstructPix2PixPipeline(
return image_embeds, uncond_image_embeds
def prepare_ip_adapter_image_embeds(
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
):
if ip_adapter_image_embeds is None:
if not isinstance(ip_adapter_image, list):
ip_adapter_image = [ip_adapter_image]
if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
raise ValueError(
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
)
image_embeds = []
for single_ip_adapter_image, image_proj_layer in zip(
ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
):
output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
single_image_embeds, single_negative_image_embeds = self.encode_image(
single_ip_adapter_image, device, 1, output_hidden_state
)
single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
single_negative_image_embeds = torch.stack(
[single_negative_image_embeds] * num_images_per_prompt, dim=0
)
if do_classifier_free_guidance:
single_image_embeds = torch.cat(
[single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
)
single_image_embeds = single_image_embeds.to(device)
image_embeds.append(single_image_embeds)
else:
repeat_dims = [1]
image_embeds = []
for single_image_embeds in ip_adapter_image_embeds:
if do_classifier_free_guidance:
(
single_image_embeds,
single_negative_image_embeds,
single_negative_image_embeds,
) = single_image_embeds.chunk(3)
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
single_negative_image_embeds = single_negative_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
)
single_image_embeds = torch.cat(
[single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
)
else:
single_image_embeds = single_image_embeds.repeat(
num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
)
image_embeds.append(single_image_embeds)
return image_embeds
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
def run_safety_checker(self, image, device, dtype):
if self.safety_checker is None:
@@ -749,8 +687,6 @@ class StableDiffusionInstructPix2PixPipeline(
negative_prompt=None,
prompt_embeds=None,
negative_prompt_embeds=None,
ip_adapter_image=None,
ip_adapter_image_embeds=None,
callback_on_step_end_tensor_inputs=None,
):
if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
@@ -792,21 +728,6 @@ class StableDiffusionInstructPix2PixPipeline(
f" {negative_prompt_embeds.shape}."
)
if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
raise ValueError(
"Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
)
if ip_adapter_image_embeds is not None:
if not isinstance(ip_adapter_image_embeds, list):
raise ValueError(
f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
)
elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
raise ValueError(
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
shape = (
@@ -436,6 +436,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
extra_step_kwargs["generator"] = generator
return extra_step_kwargs
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_instruct_pix2pix.StableDiffusionInstructPix2PixPipeline.check_inputs
def check_inputs(
self,
prompt,
@@ -324,6 +324,10 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
# PixArt transformer model does not work with sequential offload so skip it for now
def test_sequential_offload_forward_pass_twice(self):
pass
@slow
@require_torch_gpu
@@ -308,6 +308,10 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
# PixArt transformer model does not work with sequential offload so skip it for now
def test_sequential_offload_forward_pass_twice(self):
pass
@slow
@require_torch_gpu
@@ -1257,8 +1257,8 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
def test_download_from_hub(self):
ckpt_paths = [
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors",
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt",
"https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix_base.ckpt",
]
for ckpt_path in ckpt_paths:
@@ -1271,7 +1271,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
assert image_out.shape == (512, 512, 3)
def test_download_local(self):
ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.safetensors")
ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.ckpt")
config_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-inference.yaml")
pipe = StableDiffusionPipeline.from_single_file(
@@ -1285,7 +1285,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
assert image_out.shape == (512, 512, 3)
def test_download_ckpt_diff_format_is_same(self):
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
sf_pipe = StableDiffusionPipeline.from_single_file(ckpt_path)
sf_pipe.scheduler = DDIMScheduler.from_config(sf_pipe.scheduler.config)
@@ -1310,7 +1310,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
def test_single_file_component_configs(self):
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
single_file_pipe = StableDiffusionPipeline.from_single_file(ckpt_path, load_safety_checker=True)
for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items():
+9 -94
View File
@@ -1360,8 +1360,6 @@ class PipelineTesterMixin:
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
import accelerate
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
@@ -1375,7 +1373,6 @@ class PipelineTesterMixin:
output_without_offload = pipe(**inputs)[0]
pipe.enable_sequential_cpu_offload()
assert pipe._execution_device.type == pipe._offload_device.type
inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs)[0]
@@ -1383,48 +1380,11 @@ class PipelineTesterMixin:
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
offloaded_modules = {
k: v
for k, v in pipe.components.items()
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
}
# 1. all offloaded modules should be saved to cpu and moved to meta device
self.assertTrue(
all(v.device.type == "meta" for v in offloaded_modules.values()),
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
)
# 2. all offloaded modules should have hook installed
self.assertTrue(
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
)
# 3. all offloaded modules should have correct hooks installed, should be either one of these two
# - `AlignDevicesHook`
# - a SequentialHook` that contains `AlignDevicesHook`
offloaded_modules_with_incorrect_hooks = {}
for k, v in offloaded_modules.items():
if hasattr(v, "_hf_hook"):
if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
# if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
for hook in v._hf_hook.hooks:
if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
self.assertTrue(
len(offloaded_modules_with_incorrect_hooks) == 0,
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
)
@unittest.skipIf(
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"),
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
)
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
import accelerate
generator_device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
@@ -1440,39 +1400,19 @@ class PipelineTesterMixin:
output_without_offload = pipe(**inputs)[0]
pipe.enable_model_cpu_offload()
assert pipe._execution_device.type == pipe._offload_device.type
inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs)[0]
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
offloaded_modules = {
k: v
offloaded_modules = [
v
for k, v in pipe.components.items()
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
}
# 1. check if all offloaded modules are saved to cpu
self.assertTrue(
all(v.device.type == "cpu" for v in offloaded_modules.values()),
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
)
# 2. check if all offloaded modules have hooks installed
self.assertTrue(
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
)
# 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
offloaded_modules_with_incorrect_hooks = {}
for k, v in offloaded_modules.items():
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
self.assertTrue(
len(offloaded_modules_with_incorrect_hooks) == 0,
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
]
(
self.assertTrue(all(v.device.type == "cpu" for v in offloaded_modules)),
f"Not offloaded: {[v for v in offloaded_modules if v.device.type != 'cpu']}",
)
@unittest.skipIf(
@@ -1504,24 +1444,16 @@ class PipelineTesterMixin:
self.assertLess(
max_diff, expected_max_diff, "running CPU offloading 2nd time should not affect the inference results"
)
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
offloaded_modules = {
k: v
for k, v in pipe.components.items()
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
}
# 1. check if all offloaded modules are saved to cpu
self.assertTrue(
all(v.device.type == "cpu" for v in offloaded_modules.values()),
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
)
# 2. check if all offloaded modules have hooks installed
self.assertTrue(
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
)
# 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
offloaded_modules_with_incorrect_hooks = {}
for k, v in offloaded_modules.items():
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
@@ -1561,36 +1493,19 @@ class PipelineTesterMixin:
self.assertLess(
max_diff, expected_max_diff, "running sequential offloading second time should have the inference results"
)
# make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
offloaded_modules = {
k: v
for k, v in pipe.components.items()
if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
}
# 1. check if all offloaded modules are moved to meta device
self.assertTrue(
all(v.device.type == "meta" for v in offloaded_modules.values()),
f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
)
# 2. check if all offloaded modules have hook installed
self.assertTrue(
all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
)
# 3. check if all offloaded modules have correct hooks installed, should be either one of these two
# - `AlignDevicesHook`
# - a SequentialHook` that contains `AlignDevicesHook`
offloaded_modules_with_incorrect_hooks = {}
for k, v in offloaded_modules.items():
if hasattr(v, "_hf_hook"):
if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
# if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
for hook in v._hf_hook.hooks:
if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
self.assertTrue(
len(offloaded_modules_with_incorrect_hooks) == 0,