Merge branch 'main' into cache-docs-fixes

Update docs/source/en/optimization/cache.md
2025-11-26 15:22:39 +05:30 · 2025-11-20 10:06:19 +05:30 · 2025-11-20 10:00:46 +05:30 · 2025-11-19 08:40:28 +05:30
4 changed files with 16 additions and 22 deletions
@@ -29,7 +29,7 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate

 [[autodoc]] apply_faster_cache

-### FirstBlockCacheConfig
+## FirstBlockCacheConfig

 [[autodoc]] FirstBlockCacheConfig

@@ -66,4 +66,8 @@ config = FasterCacheConfig(
    tensor_format="BFCHW",
 )
 pipeline.transformer.enable_cache(config)
-```
+```
+
+## FirstBlockCache
+
+[FirstBlock Cache](https://huggingface.co/docs/diffusers/main/en/api/cache#diffusers.FirstBlockCacheConfig) builds on the ideas of [TeaCache](https://huggingface.co/papers/2411.19108). It is much simpler to implement generically for a wide range of models and has been integrated first for experimental purposes.
@@ -41,9 +41,11 @@ class CacheMixin:
        Enable caching techniques on the model.

        Args:
-            config (`Union[PyramidAttentionBroadcastConfig]`):
+            config (`Union[PyramidAttentionBroadcastConfig, FasterCacheConfig, FirstBlockCacheConfig]`):
                The configuration for applying the caching technique. Currently supported caching techniques are:
                    - [`~hooks.PyramidAttentionBroadcastConfig`]
+                    - [`~hooks.FasterCacheConfig`]
+                    - [`~hooks.FirstBlockCacheConfig`]

        Example:

@@ -160,10 +160,7 @@ class AutoOffloadStrategy:
        if len(hooks) == 0:
            return []

-        try:
-            current_module_size = model.get_memory_footprint()
-        except AttributeError:
-            raise AttributeError(f"Do not know how to compute memory footprint of `{model.__class__.__name__}.")
+        current_module_size = model.get_memory_footprint()

        device_type = execution_device.type
        device_module = getattr(torch, device_type, torch.cuda)
@@ -706,20 +703,7 @@ class ComponentsManager:
        if not is_accelerate_available():
            raise ImportError("Make sure to install accelerate to use auto_cpu_offload")

-        if device is None:
-            device = get_device()
-        if not isinstance(device, torch.device):
-            device = torch.device(device)
-
-        device_type = device.type
-        device_module = getattr(torch, device_type, torch.cuda)
-        if not hasattr(device_module, "mem_get_info"):
-            raise NotImplementedError(
-                f"`enable_auto_cpu_offload() relies on the `mem_get_info()` method. It's not implemented for {str(device.type)}."
-            )
-
-        if device.index is None:
-            device = torch.device(f"{device.type}:{0}")
+        # TODO: add a warning if mem_get_info isn't available on `device`.

        for name, component in self.components.items():
            if isinstance(component, torch.nn.Module) and hasattr(component, "_hf_hook"):
@@ -727,7 +711,11 @@ class ComponentsManager:

        self.disable_auto_cpu_offload()
        offload_strategy = AutoOffloadStrategy(memory_reserve_margin=memory_reserve_margin)
-
+        if device is None:
+            device = get_device()
+        device = torch.device(device)
+        if device.index is None:
+            device = torch.device(f"{device.type}:{0}")
        all_hooks = []
        for name, component in self.components.items():
            if isinstance(component, torch.nn.Module):
Author	SHA1	Message	Date
Sayak Paul	d76b744ac3	Merge branch 'main' into cache-docs-fixes	2025-11-26 15:22:39 +05:30
Sayak Paul	b26867b628	Merge branch 'main' into cache-docs-fixes	2025-11-20 10:06:19 +05:30
Sayak Paul	e3f441648c	Update docs/source/en/optimization/cache.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>	2025-11-20 10:00:46 +05:30
sayakpaul	c6cfc5ce1d	polish caching docs.	2025-11-19 08:40:28 +05:30