Compare commits

..

17 Commits

Author SHA1 Message Date
Dhruv Nair 20d69cb7e0 update 2025-04-01 05:16:54 +02:00
Dhruv Nair 2931139588 update 2025-04-01 05:05:29 +02:00
Dhruv Nair 12ce0d0bca update 2025-04-01 04:25:52 +02:00
Dhruv Nair 13ac0b832a update 2025-03-31 12:37:36 +02:00
Dhruv Nair 150ee4db8e Merge branch 'main' into wan-v2v 2025-03-31 11:58:29 +02:00
Dhruv Nair a23c9812ba update 2025-03-24 18:01:06 +01:00
Dhruv Nair 8ddbcad714 update 2025-03-14 12:33:53 +01:00
Dhruv Nair 6d0d68aee3 update 2025-03-14 12:19:38 +01:00
Dhruv Nair 598ca27dca update 2025-03-14 11:20:57 +01:00
DN6 aa2c37a86a update 2025-03-13 22:10:42 +05:30
DN6 558de5cba3 update 2025-03-13 22:05:53 +05:30
DN6 89e956b277 update 2025-03-13 21:53:43 +05:30
DN6 515fe28897 update 2025-03-12 07:50:47 +05:30
DN6 09f9acb473 update 2025-03-12 07:50:35 +05:30
DN6 bfdf495507 update 2025-03-11 17:09:46 +05:30
DN6 8639a1735c update 2025-03-11 17:07:31 +05:30
DN6 c9971ebcfb update 2025-03-11 16:10:07 +05:30
8 changed files with 15 additions and 57 deletions
+5 -7
View File
@@ -161,10 +161,10 @@ Your Python environment will find the `main` version of 🤗 Diffusers on the ne
Model weights and files are downloaded from the Hub to a cache which is usually your home directory. You can change the cache location by specifying the `HF_HOME` or `HUGGINFACE_HUB_CACHE` environment variables or configuring the `cache_dir` parameter in methods like [`~DiffusionPipeline.from_pretrained`].
Cached files allow you to run 🤗 Diffusers offline. To prevent 🤗 Diffusers from connecting to the internet, set the `HF_HUB_OFFLINE` environment variable to `1` and 🤗 Diffusers will only load previously downloaded files in the cache.
Cached files allow you to run 🤗 Diffusers offline. To prevent 🤗 Diffusers from connecting to the internet, set the `HF_HUB_OFFLINE` environment variable to `True` and 🤗 Diffusers will only load previously downloaded files in the cache.
```shell
export HF_HUB_OFFLINE=1
export HF_HUB_OFFLINE=True
```
For more details about managing and cleaning the cache, take a look at the [caching](https://huggingface.co/docs/huggingface_hub/guides/manage-cache) guide.
@@ -179,16 +179,14 @@ Telemetry is only sent when loading models and pipelines from the Hub,
and it is not collected if you're loading local files.
We understand that not everyone wants to share additional information,and we respect your privacy.
You can disable telemetry collection by setting the `HF_HUB_DISABLE_TELEMETRY` environment variable from your terminal:
You can disable telemetry collection by setting the `DISABLE_TELEMETRY` environment variable from your terminal:
On Linux/MacOS:
```bash
export HF_HUB_DISABLE_TELEMETRY=1
export DISABLE_TELEMETRY=YES
```
On Windows:
```bash
set HF_HUB_DISABLE_TELEMETRY=1
set DISABLE_TELEMETRY=YES
```
+2 -17
View File
@@ -1773,7 +1773,7 @@ class SDXLLongPromptWeightingPipeline(
f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
" `pipeline.unet` or your `mask_image` or `image` input."
)
elif num_channels_unet != 4:
@@ -1924,22 +1924,7 @@ class SDXLLongPromptWeightingPipeline(
self.upcast_vae()
latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
# unscale/denormalize the latents
# denormalize with the mean and std if available and not None
has_latents_mean = hasattr(self.vae.config, "latents_mean") and self.vae.config.latents_mean is not None
has_latents_std = hasattr(self.vae.config, "latents_std") and self.vae.config.latents_std is not None
if has_latents_mean and has_latents_std:
latents_mean = (
torch.tensor(self.vae.config.latents_mean).view(1, 4, 1, 1).to(latents.device, latents.dtype)
)
latents_std = (
torch.tensor(self.vae.config.latents_std).view(1, 4, 1, 1).to(latents.device, latents.dtype)
)
latents = latents * latents_std / self.vae.config.scaling_factor + latents_mean
else:
latents = latents / self.vae.config.scaling_factor
image = self.vae.decode(latents, return_dict=False)[0]
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
# cast back to fp16 if needed
if needs_upcasting:
@@ -26,7 +26,6 @@ from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_
from ...utils.torch_utils import maybe_allow_in_graph
from ..attention import FeedForward
from ..attention_processor import Attention
from ..cache_utils import CacheMixin
from ..embeddings import PixArtAlphaTextProjection
from ..modeling_outputs import Transformer2DModelOutput
from ..modeling_utils import ModelMixin
@@ -299,7 +298,7 @@ class LTXVideoTransformerBlock(nn.Module):
@maybe_allow_in_graph
class LTXVideoTransformer3DModel(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapterMixin, CacheMixin):
class LTXVideoTransformer3DModel(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapterMixin):
r"""
A Transformer model for video-like data used in [LTX](https://huggingface.co/Lightricks/LTX-Video).
@@ -24,7 +24,6 @@ from ...loaders import FromOriginalModelMixin, PeftAdapterMixin
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
from ..attention import FeedForward
from ..attention_processor import Attention
from ..cache_utils import CacheMixin
from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps, get_1d_rotary_pos_embed
from ..modeling_outputs import Transformer2DModelOutput
from ..modeling_utils import ModelMixin
@@ -289,7 +288,7 @@ class WanTransformerBlock(nn.Module):
return hidden_states
class WanTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin):
class WanTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin):
r"""
A Transformer model for video-like data used in the Wan model.
@@ -489,10 +489,6 @@ class LTXPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraLoaderMixi
def num_timesteps(self):
return self._num_timesteps
@property
def current_timestep(self):
return self._current_timestep
@property
def attention_kwargs(self):
return self._attention_kwargs
@@ -626,7 +622,6 @@ class LTXPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraLoaderMixi
self._guidance_scale = guidance_scale
self._attention_kwargs = attention_kwargs
self._interrupt = False
self._current_timestep = None
# 2. Define call parameters
if prompt is not None and isinstance(prompt, str):
@@ -711,8 +706,6 @@ class LTXPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraLoaderMixi
if self.interrupt:
continue
self._current_timestep = t
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
latent_model_input = latent_model_input.to(prompt_embeds.dtype)
@@ -774,10 +774,6 @@ class LTXConditionPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraL
def num_timesteps(self):
return self._num_timesteps
@property
def current_timestep(self):
return self._current_timestep
@property
def attention_kwargs(self):
return self._attention_kwargs
@@ -937,7 +933,6 @@ class LTXConditionPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraL
self._guidance_scale = guidance_scale
self._attention_kwargs = attention_kwargs
self._interrupt = False
self._current_timestep = None
# 2. Define call parameters
if prompt is not None and isinstance(prompt, str):
@@ -1071,8 +1066,6 @@ class LTXConditionPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraL
if self.interrupt:
continue
self._current_timestep = t
if image_cond_noise_scale > 0:
# Add timestep-dependent noise to the hard-conditioning latents
# This helps with motion continuity, especially when conditioned on a single frame
@@ -487,21 +487,19 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
) -> torch.Tensor:
height = height // self.vae_spatial_compression_ratio
width = width // self.vae_spatial_compression_ratio
num_frames = (num_frames - 1) // self.vae_temporal_compression_ratio + 1
num_frames = (
(num_frames - 1) // self.vae_temporal_compression_ratio + 1 if latents is None else latents.size(2)
)
shape = (batch_size, num_channels_latents, num_frames, height, width)
mask_shape = (batch_size, 1, num_frames, height, width)
if latents is not None:
conditioning_mask = latents.new_zeros(mask_shape)
conditioning_mask = latents.new_zeros(shape)
conditioning_mask[:, :, 0] = 1.0
conditioning_mask = self._pack_latents(
conditioning_mask, self.transformer_spatial_patch_size, self.transformer_temporal_patch_size
).squeeze(-1)
if latents.ndim != 3 or latents.shape[:2] != conditioning_mask.shape:
raise ValueError(
f"Provided `latents` tensor has shape {latents.shape}, but the expected shape is {conditioning_mask.shape + (num_channels_latents,)}."
)
)
return latents.to(device=device, dtype=dtype), conditioning_mask
if isinstance(generator, list):
@@ -550,10 +548,6 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
def num_timesteps(self):
return self._num_timesteps
@property
def current_timestep(self):
return self._current_timestep
@property
def attention_kwargs(self):
return self._attention_kwargs
@@ -690,7 +684,6 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
self._guidance_scale = guidance_scale
self._attention_kwargs = attention_kwargs
self._interrupt = False
self._current_timestep = None
# 2. Define call parameters
if prompt is not None and isinstance(prompt, str):
@@ -783,8 +776,6 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
if self.interrupt:
continue
self._current_timestep = t
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
latent_model_input = latent_model_input.to(prompt_embeds.dtype)
+1 -1
View File
@@ -315,7 +315,7 @@ class BnB8bitBasicTests(Base8bitTests):
_ = self.model_fp16.float()
# Check that this does not throw an error
_ = self.model_fp16.to(torch_device)
_ = self.model_fp16.cuda()
class Bnb8bitDeviceTests(Base8bitTests):