Merge branch 'main' into fix-model-device-map

Empty-Commit
remove patch file
2024-05-01 08:16:12 +05:30 · 2024-04-30 20:28:42 +05:30 · 2024-04-30 20:06:24 +05:30 · 2024-04-30 19:49:16 +05:30
14 changed files with 61 additions and 238 deletions
@@ -19,7 +19,7 @@ env:
 jobs:
  setup_torch_cuda_pipeline_matrix:
    name: Setup Torch Pipelines Matrix
-    runs-on: diffusers/diffusers-pytorch-cpu
+    runs-on: ubuntu-latest
    outputs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
@@ -67,19 +67,19 @@ jobs:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
-
+      
      - name: Install dependencies
        run: |
          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
          python -m uv pip install -e [quality,test]
          python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
          python -m uv pip install pytest-reportlog
-
+      
      - name: Environment
        run: |
          python utils/print_env.py
-
-      - name: Nightly PyTorch CUDA checkpoint (pipelines) tests
+      
+      - name: Nightly PyTorch CUDA checkpoint (pipelines) tests 
        env:
          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
@@ -88,9 +88,9 @@ jobs:
          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            -s -v -k "not Flax and not Onnx" \
            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
-            --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
+            --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \ 
            tests/pipelines/${{ matrix.module }}
-
+      
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
@@ -103,7 +103,7 @@ jobs:
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports
-
+      
      - name: Generate Report and Notify Channel
        if: always()
        run: |
@@ -139,7 +139,7 @@ jobs:
      run: python utils/print_env.py

    - name: Run nightly PyTorch CUDA tests for non-pipeline modules
-      if: ${{ matrix.module != 'examples'}}
+      if: ${{ matrix.module != 'examples'}} 
      env:
        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
@@ -148,7 +148,7 @@ jobs:
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "not Flax and not Onnx" \
          --make-reports=tests_torch_${{ matrix.module }}_cuda \
-          --report-log=tests_torch_${{ matrix.module }}_cuda.log \
+          --report-log=tests_torch_${{ matrix.module }}_cuda.log \ 
          tests/${{ matrix.module }}

    - name: Run nightly example tests with Torch
@@ -161,13 +161,13 @@ jobs:
        python -m uv pip install peft@git+https://github.com/huggingface/peft.git
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v --make-reports=examples_torch_cuda \
-          --report-log=examples_torch_cuda.log \
+          --report-log=examples_torch_cuda.log \ 
          examples/

    - name: Failure short reports
      if: ${{ failure() }}
      run: |
-        cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
+        cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt 
        cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt

    - name: Test suite reports artifacts
@@ -218,13 +218,13 @@ jobs:
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "not Flax and not Onnx" \
          --make-reports=tests_torch_lora_cuda \
-          --report-log=tests_torch_lora_cuda.log \
+          --report-log=tests_torch_lora_cuda.log \ 
          tests/lora
-
+    
    - name: Failure short reports
      if: ${{ failure() }}
      run: |
-        cat reports/tests_torch_lora_cuda_stats.txt
+        cat reports/tests_torch_lora_cuda_stats.txt 
        cat reports/tests_torch_lora_cuda_failures_short.txt

    - name: Test suite reports artifacts
@@ -239,12 +239,12 @@ jobs:
      run: |
        pip install slack_sdk tabulate
        python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
-
+  
  run_flax_tpu_tests:
    name: Nightly Flax TPU Tests
    runs-on: docker-tpu
    if: github.event_name == 'schedule'
-
+    
    container:
      image: diffusers/diffusers-flax-tpu
      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
@@ -274,7 +274,7 @@ jobs:
        python -m pytest -n 0 \
          -s -v -k "Flax" \
          --make-reports=tests_flax_tpu \
-          --report-log=tests_flax_tpu.log \
+          --report-log=tests_flax_tpu.log \ 
          tests/

    - name: Failure short reports
@@ -302,7 +302,7 @@ jobs:
    container:
      image: diffusers/diffusers-onnxruntime-cuda
      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
-
+    
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
@@ -321,7 +321,7 @@ jobs:

    - name: Environment
      run: python utils/print_env.py
-
+    
    - name: Run nightly ONNXRuntime CUDA tests
      env:
        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
@@ -329,7 +329,7 @@ jobs:
        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -s -v -k "Onnx" \
          --make-reports=tests_onnx_cuda \
-          --report-log=tests_onnx_cuda.log \
+          --report-log=tests_onnx_cuda.log \ 
          tests/

    - name: Failure short reports
@@ -344,7 +344,7 @@ jobs:
      with:
        name: ${{ matrix.config.report }}_test_reports
        path: reports
-
+    
    - name: Generate Report and Notify Channel
      if: always()
      run: |
@@ -21,7 +21,7 @@ env:
 jobs:
  setup_torch_cuda_pipeline_matrix:
    name: Setup Torch Pipelines CUDA Slow Tests Matrix
-    runs-on: diffusers/diffusers-pytorch-cpu
+    runs-on: ubuntu-latest
    outputs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
@@ -1304,11 +1304,7 @@ class DemoFusionSDXLPipeline(
            if isinstance(component, torch.nn.Module):
                if hasattr(component, "_hf_hook"):
                    is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
-                    is_sequential_cpu_offload = (
-                        isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
-                        or hasattr(component._hf_hook, "hooks")
-                        and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
-                    )
+                    is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
                    logger.info(
                        "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
                    )
@@ -369,11 +369,7 @@ class LoraLoaderMixin:
                    if not is_model_cpu_offload:
                        is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
                    if not is_sequential_cpu_offload:
-                        is_sequential_cpu_offload = (
-                            isinstance(component._hf_hook, AlignDevicesHook)
-                            or hasattr(component._hf_hook, "hooks")
-                            and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
-                        )
+                        is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)

                    logger.info(
                        "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
@@ -423,11 +423,7 @@ class TextualInversionLoaderMixin:
            if isinstance(component, nn.Module):
                if hasattr(component, "_hf_hook"):
                    is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
-                    is_sequential_cpu_offload = (
-                        isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
-                        or hasattr(component._hf_hook, "hooks")
-                        and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
-                    )
+                    is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
                    logger.info(
                        "Accelerate hooks detected. Since you have called `load_textual_inversion()`, the previous hooks will be first removed. Then the textual inversion parameters will be loaded and the hooks will be applied again."
                    )
@@ -359,11 +359,7 @@ class UNet2DConditionLoadersMixin:
                for _, component in _pipeline.components.items():
                    if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
                        is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
-                        is_sequential_cpu_offload = (
-                            isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
-                            or hasattr(component._hf_hook, "hooks")
-                            and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
-                        )
+                        is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)

                        logger.info(
                            "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
@@ -227,9 +227,6 @@ class DiTPipeline(DiffusionPipeline):
        if output_type == "pil":
            samples = self.numpy_to_pil(samples)

-        # Offload all models
-        self.maybe_free_model_hooks()
-
        if not return_dict:
            return (samples,)

@@ -376,11 +376,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
            if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
                return False

-            return hasattr(module, "_hf_hook") and (
-                isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)
-                or hasattr(module._hf_hook, "hooks")
-                and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook)
-            )
+            return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook)

        def module_is_offloaded(module):
            if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"):
@@ -1009,7 +1005,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
        """
        for _, model in self.components.items():
            if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"):
-                accelerate.hooks.remove_hook_from_module(model, recurse=True)
+                is_sequential_cpu_offload = isinstance(getattr(model, "_hf_hook"), accelerate.hooks.AlignDevicesHook)
+                accelerate.hooks.remove_hook_from_module(model, recurse=is_sequential_cpu_offload)
        self._all_hooks = []

    def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
@@ -172,7 +172,6 @@ class StableDiffusionInstructPix2PixPipeline(
        prompt_embeds: Optional[torch.FloatTensor] = None,
        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
        ip_adapter_image: Optional[PipelineImageInput] = None,
-        ip_adapter_image_embeds: Optional[List[torch.FloatTensor]] = None,
        output_type: Optional[str] = "pil",
        return_dict: bool = True,
        callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
@@ -297,8 +296,6 @@ class StableDiffusionInstructPix2PixPipeline(
            negative_prompt,
            prompt_embeds,
            negative_prompt_embeds,
-            ip_adapter_image,
-            ip_adapter_image_embeds,
            callback_on_step_end_tensor_inputs,
        )
        self._guidance_scale = guidance_scale
@@ -306,6 +303,14 @@ class StableDiffusionInstructPix2PixPipeline(

        device = self._execution_device

+        if ip_adapter_image is not None:
+            output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
+            image_embeds, negative_image_embeds = self.encode_image(
+                ip_adapter_image, device, num_images_per_prompt, output_hidden_state
+            )
+            if self.do_classifier_free_guidance:
+                image_embeds = torch.cat([image_embeds, negative_image_embeds, negative_image_embeds])
+
        if image is None:
            raise ValueError("`image` input cannot be undefined.")

@@ -330,14 +335,6 @@ class StableDiffusionInstructPix2PixPipeline(
            negative_prompt_embeds=negative_prompt_embeds,
        )

-        if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
-            image_embeds = self.prepare_ip_adapter_image_embeds(
-                ip_adapter_image,
-                ip_adapter_image_embeds,
-                device,
-                batch_size * num_images_per_prompt,
-                self.do_classifier_free_guidance,
-            )
        # 3. Preprocess image
        image = self.image_processor.preprocess(image)

@@ -638,65 +635,6 @@ class StableDiffusionInstructPix2PixPipeline(

            return image_embeds, uncond_image_embeds

-    def prepare_ip_adapter_image_embeds(
-        self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
-    ):
-        if ip_adapter_image_embeds is None:
-            if not isinstance(ip_adapter_image, list):
-                ip_adapter_image = [ip_adapter_image]
-
-            if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
-                raise ValueError(
-                    f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
-                )
-
-            image_embeds = []
-            for single_ip_adapter_image, image_proj_layer in zip(
-                ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
-            ):
-                output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
-                single_image_embeds, single_negative_image_embeds = self.encode_image(
-                    single_ip_adapter_image, device, 1, output_hidden_state
-                )
-                single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
-                single_negative_image_embeds = torch.stack(
-                    [single_negative_image_embeds] * num_images_per_prompt, dim=0
-                )
-
-                if do_classifier_free_guidance:
-                    single_image_embeds = torch.cat(
-                        [single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
-                    )
-                    single_image_embeds = single_image_embeds.to(device)
-
-                image_embeds.append(single_image_embeds)
-        else:
-            repeat_dims = [1]
-            image_embeds = []
-            for single_image_embeds in ip_adapter_image_embeds:
-                if do_classifier_free_guidance:
-                    (
-                        single_image_embeds,
-                        single_negative_image_embeds,
-                        single_negative_image_embeds,
-                    ) = single_image_embeds.chunk(3)
-                    single_image_embeds = single_image_embeds.repeat(
-                        num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
-                    )
-                    single_negative_image_embeds = single_negative_image_embeds.repeat(
-                        num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
-                    )
-                    single_image_embeds = torch.cat(
-                        [single_image_embeds, single_negative_image_embeds, single_negative_image_embeds]
-                    )
-                else:
-                    single_image_embeds = single_image_embeds.repeat(
-                        num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
-                    )
-                image_embeds.append(single_image_embeds)
-
-        return image_embeds
-
    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
    def run_safety_checker(self, image, device, dtype):
        if self.safety_checker is None:
@@ -749,8 +687,6 @@ class StableDiffusionInstructPix2PixPipeline(
        negative_prompt=None,
        prompt_embeds=None,
        negative_prompt_embeds=None,
-        ip_adapter_image=None,
-        ip_adapter_image_embeds=None,
        callback_on_step_end_tensor_inputs=None,
    ):
        if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
@@ -792,21 +728,6 @@ class StableDiffusionInstructPix2PixPipeline(
                    f" {negative_prompt_embeds.shape}."
                )

-        if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
-            raise ValueError(
-                "Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
-            )
-
-        if ip_adapter_image_embeds is not None:
-            if not isinstance(ip_adapter_image_embeds, list):
-                raise ValueError(
-                    f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
-                )
-            elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
-                raise ValueError(
-                    f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
-                )
-
    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
    def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
        shape = (
@@ -436,6 +436,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
            extra_step_kwargs["generator"] = generator
        return extra_step_kwargs

+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_instruct_pix2pix.StableDiffusionInstructPix2PixPipeline.check_inputs
    def check_inputs(
        self,
        prompt,
@@ -324,6 +324,10 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
    def test_inference_batch_single_identical(self):
        self._test_inference_batch_single_identical(expected_max_diff=1e-3)

+    # PixArt transformer model does not work with sequential offload so skip it for now
+    def test_sequential_offload_forward_pass_twice(self):
+        pass
+

@slow
@require_torch_gpu
@@ -308,6 +308,10 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
    def test_inference_batch_single_identical(self):
        self._test_inference_batch_single_identical(expected_max_diff=1e-3)

+    # PixArt transformer model does not work with sequential offload so skip it for now
+    def test_sequential_offload_forward_pass_twice(self):
+        pass
+

@slow
@require_torch_gpu
@@ -1257,8 +1257,8 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):

    def test_download_from_hub(self):
        ckpt_paths = [
-            "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
-            "https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors",
+            "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt",
+            "https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix_base.ckpt",
        ]

        for ckpt_path in ckpt_paths:
@@ -1271,7 +1271,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
        assert image_out.shape == (512, 512, 3)

    def test_download_local(self):
-        ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.safetensors")
+        ckpt_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-5-pruned-emaonly.ckpt")
        config_filename = hf_hub_download("runwayml/stable-diffusion-v1-5", filename="v1-inference.yaml")

        pipe = StableDiffusionPipeline.from_single_file(
@@ -1285,7 +1285,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
        assert image_out.shape == (512, 512, 3)

    def test_download_ckpt_diff_format_is_same(self):
-        ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
+        ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"

        sf_pipe = StableDiffusionPipeline.from_single_file(ckpt_path)
        sf_pipe.scheduler = DDIMScheduler.from_config(sf_pipe.scheduler.config)
@@ -1310,7 +1310,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
    def test_single_file_component_configs(self):
        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")

-        ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors"
+        ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
        single_file_pipe = StableDiffusionPipeline.from_single_file(ckpt_path, load_safety_checker=True)

        for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items():
@@ -1360,8 +1360,6 @@ class PipelineTesterMixin:
        reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
    )
    def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
-        import accelerate
-
        components = self.get_dummy_components()
        pipe = self.pipeline_class(**components)
        for component in pipe.components.values():
@@ -1375,7 +1373,6 @@ class PipelineTesterMixin:
        output_without_offload = pipe(**inputs)[0]

        pipe.enable_sequential_cpu_offload()
-        assert pipe._execution_device.type == pipe._offload_device.type

        inputs = self.get_dummy_inputs(generator_device)
        output_with_offload = pipe(**inputs)[0]
@@ -1383,48 +1380,11 @@ class PipelineTesterMixin:
        max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
        self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")

-        # make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
-        offloaded_modules = {
-            k: v
-            for k, v in pipe.components.items()
-            if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
-        }
-        # 1. all offloaded modules should be saved to cpu and moved to meta device
-        self.assertTrue(
-            all(v.device.type == "meta" for v in offloaded_modules.values()),
-            f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
-        )
-        # 2. all offloaded modules should have hook installed
-        self.assertTrue(
-            all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
-            f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
-        )
-        # 3. all offloaded modules should have correct hooks installed, should be either one of these two
-        #    - `AlignDevicesHook`
-        #    - a SequentialHook` that contains `AlignDevicesHook`
-        offloaded_modules_with_incorrect_hooks = {}
-        for k, v in offloaded_modules.items():
-            if hasattr(v, "_hf_hook"):
-                if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
-                    # if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
-                    for hook in v._hf_hook.hooks:
-                        if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
-                            offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
-                elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
-                    offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
-
-        self.assertTrue(
-            len(offloaded_modules_with_incorrect_hooks) == 0,
-            f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
-        )
-
    @unittest.skipIf(
        torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"),
        reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
    )
    def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
-        import accelerate
-
        generator_device = "cpu"
        components = self.get_dummy_components()
        pipe = self.pipeline_class(**components)
@@ -1440,39 +1400,19 @@ class PipelineTesterMixin:
        output_without_offload = pipe(**inputs)[0]

        pipe.enable_model_cpu_offload()
-        assert pipe._execution_device.type == pipe._offload_device.type
-
        inputs = self.get_dummy_inputs(generator_device)
        output_with_offload = pipe(**inputs)[0]

        max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
        self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
-
-        # make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
-        offloaded_modules = {
-            k: v
+        offloaded_modules = [
+            v
            for k, v in pipe.components.items()
            if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
-        }
-        # 1. check if all offloaded modules are saved to cpu
-        self.assertTrue(
-            all(v.device.type == "cpu" for v in offloaded_modules.values()),
-            f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
-        )
-        # 2. check if all offloaded modules have hooks installed
-        self.assertTrue(
-            all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
-            f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
-        )
-        # 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
-        offloaded_modules_with_incorrect_hooks = {}
-        for k, v in offloaded_modules.items():
-            if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
-                offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
-
-        self.assertTrue(
-            len(offloaded_modules_with_incorrect_hooks) == 0,
-            f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
+        ]
+        (
+            self.assertTrue(all(v.device.type == "cpu" for v in offloaded_modules)),
+            f"Not offloaded: {[v for v in offloaded_modules if v.device.type != 'cpu']}",
        )

    @unittest.skipIf(
@@ -1504,24 +1444,16 @@ class PipelineTesterMixin:
        self.assertLess(
            max_diff, expected_max_diff, "running CPU offloading 2nd time should not affect the inference results"
        )
-
-        # make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
        offloaded_modules = {
            k: v
            for k, v in pipe.components.items()
            if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
        }
-        # 1. check if all offloaded modules are saved to cpu
        self.assertTrue(
            all(v.device.type == "cpu" for v in offloaded_modules.values()),
            f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'cpu']}",
        )
-        # 2. check if all offloaded modules have hooks installed
-        self.assertTrue(
-            all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
-            f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
-        )
-        # 3. check if all offloaded modules have correct type of hooks installed, should be `CpuOffload`
+
        offloaded_modules_with_incorrect_hooks = {}
        for k, v in offloaded_modules.items():
            if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.CpuOffload):
@@ -1561,36 +1493,19 @@ class PipelineTesterMixin:
        self.assertLess(
            max_diff, expected_max_diff, "running sequential offloading second time should have the inference results"
        )
-
-        # make sure all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are offloaded correctly
        offloaded_modules = {
            k: v
            for k, v in pipe.components.items()
            if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
        }
-        # 1. check if all offloaded modules are moved to meta device
        self.assertTrue(
            all(v.device.type == "meta" for v in offloaded_modules.values()),
            f"Not offloaded: {[k for k, v in offloaded_modules.items() if v.device.type != 'meta']}",
        )
-        # 2. check if all offloaded modules have hook installed
-        self.assertTrue(
-            all(hasattr(v, "_hf_hook") for k, v in offloaded_modules.items()),
-            f"No hook attached: {[k for k, v in offloaded_modules.items() if not hasattr(v, '_hf_hook')]}",
-        )
-        # 3. check if all offloaded modules have correct hooks installed, should be either one of these two
-        #    - `AlignDevicesHook`
-        #    - a SequentialHook` that contains `AlignDevicesHook`
        offloaded_modules_with_incorrect_hooks = {}
        for k, v in offloaded_modules.items():
-            if hasattr(v, "_hf_hook"):
-                if isinstance(v._hf_hook, accelerate.hooks.SequentialHook):
-                    # if it is a `SequentialHook`, we loop through its `hooks` attribute to check if it only contains `AlignDevicesHook`
-                    for hook in v._hf_hook.hooks:
-                        if not isinstance(hook, accelerate.hooks.AlignDevicesHook):
-                            offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook.hooks[0])
-                elif not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
-                    offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)
+            if hasattr(v, "_hf_hook") and not isinstance(v._hf_hook, accelerate.hooks.AlignDevicesHook):
+                offloaded_modules_with_incorrect_hooks[k] = type(v._hf_hook)

        self.assertTrue(
            len(offloaded_modules_with_incorrect_hooks) == 0,
Author	SHA1	Message	Date
Sayak Paul	a75bc61b71	Merge branch 'main' into fix-model-device-map	2024-05-01 08:16:12 +05:30
sayakpaul	8de27500da	Empty-Commit	2024-04-30 20:28:42 +05:30
sayakpaul	5575303be0	remove patch file	2024-04-30 20:06:24 +05:30
sayakpaul	235ff8808f	fix: device module tests	2024-04-30 19:49:16 +05:30