Compare commits

..

4 Commits

Author SHA1 Message Date
Sayak Paul d76b744ac3 Merge branch 'main' into cache-docs-fixes 2025-11-26 15:22:39 +05:30
Sayak Paul b26867b628 Merge branch 'main' into cache-docs-fixes 2025-11-20 10:06:19 +05:30
Sayak Paul e3f441648c Update docs/source/en/optimization/cache.md
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2025-11-20 10:00:46 +05:30
sayakpaul c6cfc5ce1d polish caching docs. 2025-11-19 08:40:28 +05:30
4 changed files with 16 additions and 22 deletions
+1 -1
View File
@@ -29,7 +29,7 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
[[autodoc]] apply_faster_cache
### FirstBlockCacheConfig
## FirstBlockCacheConfig
[[autodoc]] FirstBlockCacheConfig
+5 -1
View File
@@ -66,4 +66,8 @@ config = FasterCacheConfig(
tensor_format="BFCHW",
)
pipeline.transformer.enable_cache(config)
```
```
## FirstBlockCache
[FirstBlock Cache](https://huggingface.co/docs/diffusers/main/en/api/cache#diffusers.FirstBlockCacheConfig) builds on the ideas of [TeaCache](https://huggingface.co/papers/2411.19108). It is much simpler to implement generically for a wide range of models and has been integrated first for experimental purposes.
+3 -1
View File
@@ -41,9 +41,11 @@ class CacheMixin:
Enable caching techniques on the model.
Args:
config (`Union[PyramidAttentionBroadcastConfig]`):
config (`Union[PyramidAttentionBroadcastConfig, FasterCacheConfig, FirstBlockCacheConfig]`):
The configuration for applying the caching technique. Currently supported caching techniques are:
- [`~hooks.PyramidAttentionBroadcastConfig`]
- [`~hooks.FasterCacheConfig`]
- [`~hooks.FirstBlockCacheConfig`]
Example:
@@ -160,10 +160,7 @@ class AutoOffloadStrategy:
if len(hooks) == 0:
return []
try:
current_module_size = model.get_memory_footprint()
except AttributeError:
raise AttributeError(f"Do not know how to compute memory footprint of `{model.__class__.__name__}.")
current_module_size = model.get_memory_footprint()
device_type = execution_device.type
device_module = getattr(torch, device_type, torch.cuda)
@@ -706,20 +703,7 @@ class ComponentsManager:
if not is_accelerate_available():
raise ImportError("Make sure to install accelerate to use auto_cpu_offload")
if device is None:
device = get_device()
if not isinstance(device, torch.device):
device = torch.device(device)
device_type = device.type
device_module = getattr(torch, device_type, torch.cuda)
if not hasattr(device_module, "mem_get_info"):
raise NotImplementedError(
f"`enable_auto_cpu_offload() relies on the `mem_get_info()` method. It's not implemented for {str(device.type)}."
)
if device.index is None:
device = torch.device(f"{device.type}:{0}")
# TODO: add a warning if mem_get_info isn't available on `device`.
for name, component in self.components.items():
if isinstance(component, torch.nn.Module) and hasattr(component, "_hf_hook"):
@@ -727,7 +711,11 @@ class ComponentsManager:
self.disable_auto_cpu_offload()
offload_strategy = AutoOffloadStrategy(memory_reserve_margin=memory_reserve_margin)
if device is None:
device = get_device()
device = torch.device(device)
if device.index is None:
device = torch.device(f"{device.type}:{0}")
all_hooks = []
for name, component in self.components.items():
if isinstance(component, torch.nn.Module):