Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 92199ff3ac | |||
| 04e9323055 | |||
| 9a09162baf | |||
| 33a8a3be0c | |||
| 58743c3ee7 | |||
| 50c0b786d2 | |||
| f5c113e439 | |||
| 5e181eddfe | |||
| 55f0b3d758 | |||
| eb7ef26736 | |||
| e1b7f1f240 | |||
| 9e7ae568d6 | |||
| f7b79452b4 | |||
| 43459079ab |
@@ -50,7 +50,7 @@ from diffusers.utils import export_to_video
|
||||
pipeline_quant_config = PipelineQuantizationConfig(
|
||||
quant_backend="torchao",
|
||||
quant_kwargs={"quant_type": "int8wo"},
|
||||
components_to_quantize=["transformer"]
|
||||
components_to_quantize="transformer"
|
||||
)
|
||||
|
||||
# fp8 layerwise weight-casting
|
||||
|
||||
@@ -54,7 +54,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
|
||||
"bnb_4bit_quant_type": "nf4",
|
||||
"bnb_4bit_compute_dtype": torch.bfloat16
|
||||
},
|
||||
components_to_quantize=["transformer"]
|
||||
components_to_quantize="transformer"
|
||||
)
|
||||
|
||||
pipeline = HunyuanVideoPipeline.from_pretrained(
|
||||
@@ -91,7 +91,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
|
||||
"bnb_4bit_quant_type": "nf4",
|
||||
"bnb_4bit_compute_dtype": torch.bfloat16
|
||||
},
|
||||
components_to_quantize=["transformer"]
|
||||
components_to_quantize="transformer"
|
||||
)
|
||||
|
||||
pipeline = HunyuanVideoPipeline.from_pretrained(
|
||||
@@ -139,7 +139,7 @@ export_to_video(video, "output.mp4", fps=15)
|
||||
"bnb_4bit_quant_type": "nf4",
|
||||
"bnb_4bit_compute_dtype": torch.bfloat16
|
||||
},
|
||||
components_to_quantize=["transformer"]
|
||||
components_to_quantize="transformer"
|
||||
)
|
||||
|
||||
pipeline = HunyuanVideoPipeline.from_pretrained(
|
||||
|
||||
@@ -291,13 +291,53 @@ Group offloading moves groups of internal layers ([torch.nn.ModuleList](https://
|
||||
> [!WARNING]
|
||||
> Group offloading may not work with all models if the forward implementation contains weight-dependent device casting of inputs because it may clash with group offloading's device casting mechanism.
|
||||
|
||||
Call [`~ModelMixin.enable_group_offload`] to enable it for standard Diffusers model components that inherit from [`ModelMixin`]. For other model components that don't inherit from [`ModelMixin`], such as a generic [torch.nn.Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html), use [`~hooks.apply_group_offloading`] instead.
|
||||
|
||||
The `offload_type` parameter can be set to `block_level` or `leaf_level`.
|
||||
Enable group offloading by configuring the `offload_type` parameter to `block_level` or `leaf_level`.
|
||||
|
||||
- `block_level` offloads groups of layers based on the `num_blocks_per_group` parameter. For example, if `num_blocks_per_group=2` on a model with 40 layers, 2 layers are onloaded and offloaded at a time (20 total onloads/offloads). This drastically reduces memory requirements.
|
||||
- `leaf_level` offloads individual layers at the lowest level and is equivalent to [CPU offloading](#cpu-offloading). But it can be made faster if you use streams without giving up inference speed.
|
||||
|
||||
Group offloading is supported for entire pipelines or individual models. Applying group offloading to the entire pipeline is the easiest option while selectively applying it to individual models gives users more flexibility to use different offloading techniques for different models.
|
||||
|
||||
<hfoptions id="group-offloading">
|
||||
<hfoption id="pipeline">
|
||||
|
||||
Call [`~DiffusionPipeline.enable_group_offload`] on a pipeline.
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import CogVideoXPipeline
|
||||
from diffusers.hooks import apply_group_offloading
|
||||
from diffusers.utils import export_to_video
|
||||
|
||||
onload_device = torch.device("cuda")
|
||||
offload_device = torch.device("cpu")
|
||||
|
||||
pipeline = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16)
|
||||
pipeline.enable_group_offload(
|
||||
onload_device=onload_device,
|
||||
offload_device=offload_device,
|
||||
offload_type="leaf_level",
|
||||
use_stream=True
|
||||
)
|
||||
|
||||
prompt = (
|
||||
"A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. "
|
||||
"The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other "
|
||||
"pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, "
|
||||
"casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. "
|
||||
"The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical "
|
||||
"atmosphere of this unique musical performance."
|
||||
)
|
||||
video = pipeline(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
|
||||
print(f"Max memory reserved: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")
|
||||
export_to_video(video, "output.mp4", fps=8)
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="model">
|
||||
|
||||
Call [`~ModelMixin.enable_group_offload`] on standard Diffusers model components that inherit from [`ModelMixin`]. For other model components that don't inherit from [`ModelMixin`], such as a generic [torch.nn.Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html), use [`~hooks.apply_group_offloading`] instead.
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import CogVideoXPipeline
|
||||
@@ -328,6 +368,9 @@ print(f"Max memory reserved: {torch.cuda.max_memory_allocated() / 1024**3:.2f} G
|
||||
export_to_video(video, "output.mp4", fps=8)
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
#### CUDA stream
|
||||
|
||||
The `use_stream` parameter can be activated for CUDA devices that support asynchronous data transfer streams to reduce overall execution time compared to [CPU offloading](#cpu-offloading). It overlaps data transfer and computation by using layer prefetching. The next layer to be executed is loaded onto the GPU while the current layer is still being executed. It can increase CPU memory significantly so ensure you have 2x the amount of memory as the model size.
|
||||
|
||||
@@ -34,7 +34,9 @@ Initialize [`~quantizers.PipelineQuantizationConfig`] with the following paramet
|
||||
> [!TIP]
|
||||
> These `quant_kwargs` arguments are different for each backend. Refer to the [Quantization API](../api/quantization) docs to view the arguments for each backend.
|
||||
|
||||
- `components_to_quantize` specifies which components of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact.
|
||||
- `components_to_quantize` specifies which component(s) of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact.
|
||||
|
||||
`components_to_quantize` accepts either a list for multiple models or a string for a single model.
|
||||
|
||||
The example below loads the bitsandbytes backend with the following arguments from [`~quantizers.quantization_config.BitsAndBytesConfig`], `load_in_4bit`, `bnb_4bit_quant_type`, and `bnb_4bit_compute_dtype`.
|
||||
|
||||
@@ -62,6 +64,7 @@ pipe = DiffusionPipeline.from_pretrained(
|
||||
image = pipe("photo of a cute dog").images[0]
|
||||
```
|
||||
|
||||
|
||||
### Advanced quantization
|
||||
|
||||
The `quant_mapping` argument provides more options for how to quantize each individual component in a pipeline, like combining different quantization backends.
|
||||
|
||||
@@ -98,7 +98,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
|
||||
"bnb_4bit_quant_type": "nf4",
|
||||
"bnb_4bit_compute_dtype": torch.bfloat16
|
||||
},
|
||||
components_to_quantize=["transformer"]
|
||||
components_to_quantize="transformer"
|
||||
)
|
||||
|
||||
pipeline = HunyuanVideoPipeline.from_pretrained(
|
||||
|
||||
@@ -1705,6 +1705,12 @@ class FaithDiffStableDiffusionXLPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
self.unet.denoise_encoder.enable_tiling()
|
||||
|
||||
@@ -1713,6 +1719,12 @@ class FaithDiffStableDiffusionXLPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
self.unet.denoise_encoder.disable_tiling()
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -643,6 +644,12 @@ class FluxKontextPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
|
||||
@@ -651,6 +658,12 @@ class FluxKontextPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def preprocess_image(self, image: PipelineImageInput, _auto_resize: bool, multiple_of: int) -> torch.Tensor:
|
||||
|
||||
@@ -30,6 +30,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -526,6 +527,12 @@ class RFInversionFluxPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -533,6 +540,12 @@ class RFInversionFluxPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -541,6 +554,12 @@ class RFInversionFluxPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -548,6 +567,12 @@ class RFInversionFluxPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents_inversion(
|
||||
|
||||
@@ -35,6 +35,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -702,6 +703,12 @@ class FluxSemanticGuidancePipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
|
||||
@@ -710,6 +717,12 @@ class FluxSemanticGuidancePipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.prepare_latents
|
||||
|
||||
@@ -28,6 +28,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -503,6 +504,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -510,6 +517,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -518,6 +531,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -525,6 +544,12 @@ class FluxCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFileMixi
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -29,11 +29,7 @@ from diffusers.models.transformers import SD3Transformer2DModel
|
||||
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion_3.pipeline_output import StableDiffusion3PipelineOutput
|
||||
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from diffusers.utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
|
||||
|
||||
@@ -504,6 +504,12 @@ class StableDiffusionBoxDiffPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -511,6 +517,12 @@ class StableDiffusionBoxDiffPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -519,6 +531,12 @@ class StableDiffusionBoxDiffPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -526,6 +544,12 @@ class StableDiffusionBoxDiffPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def _encode_prompt(
|
||||
|
||||
@@ -471,6 +471,12 @@ class StableDiffusionPAGPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -478,6 +484,12 @@ class StableDiffusionPAGPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -486,6 +498,12 @@ class StableDiffusionPAGPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -493,6 +511,12 @@ class StableDiffusionPAGPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def _encode_prompt(
|
||||
|
||||
@@ -26,7 +26,7 @@ from diffusers.models import AutoencoderKLHunyuanVideo, HunyuanVideoTransformer3
|
||||
from diffusers.pipelines.hunyuan_video.pipeline_output import HunyuanVideoPipelineOutput
|
||||
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from diffusers.utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
from diffusers.video_processor import VideoProcessor
|
||||
|
||||
@@ -481,6 +481,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -488,6 +494,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -496,6 +508,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -503,6 +521,12 @@ class HunyuanVideoSTGPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
@property
|
||||
|
||||
@@ -26,11 +26,7 @@ from diffusers.models import AutoencoderKLMochi, MochiTransformer3DModel
|
||||
from diffusers.pipelines.mochi.pipeline_output import MochiPipelineOutput
|
||||
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from diffusers.utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
from diffusers.video_processor import VideoProcessor
|
||||
|
||||
@@ -458,6 +454,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -465,6 +467,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -473,6 +481,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -480,6 +494,12 @@ class MochiSTGPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -263,6 +263,12 @@ class PromptDiffusionPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
|
||||
@@ -271,6 +277,12 @@ class PromptDiffusionPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
|
||||
|
||||
@@ -22,6 +22,7 @@ from huggingface_hub.utils import validate_hf_hub_args
|
||||
from typing_extensions import Self
|
||||
|
||||
from .. import __version__
|
||||
from ..models.model_loading_utils import _caching_allocator_warmup, _determine_device_map, _expand_device_map
|
||||
from ..quantizers import DiffusersAutoQuantizer
|
||||
from ..utils import deprecate, is_accelerate_available, is_torch_version, logging
|
||||
from ..utils.torch_utils import empty_device_cache
|
||||
@@ -297,6 +298,7 @@ class FromOriginalModelMixin:
|
||||
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
|
||||
device = kwargs.pop("device", None)
|
||||
disable_mmap = kwargs.pop("disable_mmap", False)
|
||||
device_map = kwargs.pop("device_map", None)
|
||||
|
||||
user_agent = {"diffusers": __version__, "file_type": "single_file", "framework": "pytorch"}
|
||||
# In order to ensure popular quantization methods are supported. Can be disable with `disable_telemetry`
|
||||
@@ -403,19 +405,8 @@ class FromOriginalModelMixin:
|
||||
with ctx():
|
||||
model = cls.from_config(diffusers_model_config)
|
||||
|
||||
checkpoint_mapping_kwargs = _get_mapping_function_kwargs(checkpoint_mapping_fn, **kwargs)
|
||||
model_state_dict = model.state_dict()
|
||||
|
||||
if _should_convert_state_dict_to_diffusers(model.state_dict(), checkpoint):
|
||||
diffusers_format_checkpoint = checkpoint_mapping_fn(
|
||||
config=diffusers_model_config, checkpoint=checkpoint, **checkpoint_mapping_kwargs
|
||||
)
|
||||
else:
|
||||
diffusers_format_checkpoint = checkpoint
|
||||
|
||||
if not diffusers_format_checkpoint:
|
||||
raise SingleFileComponentError(
|
||||
f"Failed to load {mapping_class_name}. Weights for this component appear to be missing in the checkpoint."
|
||||
)
|
||||
# Check if `_keep_in_fp32_modules` is not None
|
||||
use_keep_in_fp32_modules = (cls._keep_in_fp32_modules is not None) and (
|
||||
(torch_dtype == torch.float16) or hasattr(hf_quantizer, "use_keep_in_fp32_modules")
|
||||
@@ -428,6 +419,26 @@ class FromOriginalModelMixin:
|
||||
else:
|
||||
keep_in_fp32_modules = []
|
||||
|
||||
# Now that the model is loaded, we can determine the `device_map`
|
||||
device_map = _determine_device_map(model, device_map, None, torch_dtype, keep_in_fp32_modules, hf_quantizer)
|
||||
if device_map is not None:
|
||||
expanded_device_map = _expand_device_map(device_map, model_state_dict.keys())
|
||||
_caching_allocator_warmup(model, expanded_device_map, torch_dtype, hf_quantizer)
|
||||
|
||||
checkpoint_mapping_kwargs = _get_mapping_function_kwargs(checkpoint_mapping_fn, **kwargs)
|
||||
|
||||
if _should_convert_state_dict_to_diffusers(model_state_dict, checkpoint):
|
||||
diffusers_format_checkpoint = checkpoint_mapping_fn(
|
||||
config=diffusers_model_config, checkpoint=checkpoint, **checkpoint_mapping_kwargs
|
||||
)
|
||||
else:
|
||||
diffusers_format_checkpoint = checkpoint
|
||||
|
||||
if not diffusers_format_checkpoint:
|
||||
raise SingleFileComponentError(
|
||||
f"Failed to load {mapping_class_name}. Weights for this component appear to be missing in the checkpoint."
|
||||
)
|
||||
|
||||
if hf_quantizer is not None:
|
||||
hf_quantizer.preprocess_model(
|
||||
model=model,
|
||||
|
||||
@@ -17,10 +17,11 @@ import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
from ..utils import deprecate
|
||||
from ..utils.import_utils import is_torch_npu_available, is_torch_version
|
||||
from ..utils import deprecate, get_logger, is_torch_npu_available, is_torch_version
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
if is_torch_npu_available():
|
||||
import torch_npu
|
||||
|
||||
@@ -31,6 +32,7 @@ ACT2CLS = {
|
||||
"gelu": nn.GELU,
|
||||
"relu": nn.ReLU,
|
||||
}
|
||||
KERNELS_REPO_ID = "kernels-community/activation"
|
||||
|
||||
|
||||
def get_activation(act_fn: str) -> nn.Module:
|
||||
@@ -90,6 +92,27 @@ class GELU(nn.Module):
|
||||
return hidden_states
|
||||
|
||||
|
||||
# TODO: validation checks / consider making Python classes of activations like `transformers`
|
||||
# All of these are temporary for now.
|
||||
class CUDAOptimizedGELU(GELU):
|
||||
def __init__(self, *args, **kwargs):
|
||||
from kernels import get_kernel
|
||||
|
||||
activation = get_kernel("kernels-community/activation", revision="add_more_act")
|
||||
approximate = kwargs.get("approximate", "none")
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
if approximate == "none":
|
||||
self.act_fn = activation.layers.Gelu()
|
||||
elif approximate == "tanh":
|
||||
self.act_fn = activation.layers.GeluTanh()
|
||||
|
||||
def forward(self, hidden_states):
|
||||
hidden_states = self.proj(hidden_states)
|
||||
hidden_states = self.act_fn(hidden_states)
|
||||
return hidden_states
|
||||
|
||||
|
||||
class GEGLU(nn.Module):
|
||||
r"""
|
||||
A [variant](https://huggingface.co/papers/2002.05202) of the gated linear unit activation function.
|
||||
|
||||
@@ -20,11 +20,20 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from ..utils import is_torch_npu_available, is_torch_version
|
||||
from ..utils import is_kernels_available, is_torch_npu_available, is_torch_version
|
||||
from ..utils.constants import DIFFUSERS_ENABLE_HUB_KERNELS
|
||||
from ..utils.kernels_utils import use_kernel_forward_from_hub
|
||||
from .activations import get_activation
|
||||
from .embeddings import CombinedTimestepLabelEmbeddings, PixArtAlphaCombinedTimestepSizeEmbeddings
|
||||
|
||||
|
||||
if is_kernels_available() and DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
from kernels import get_kernel
|
||||
|
||||
activation = get_kernel("kernels-community/activation", revision="add_more_act")
|
||||
silu_kernel = activation.layers.Silu
|
||||
|
||||
|
||||
class AdaLayerNorm(nn.Module):
|
||||
r"""
|
||||
Norm layer modified to incorporate timestep embeddings.
|
||||
@@ -57,7 +66,10 @@ class AdaLayerNorm(nn.Module):
|
||||
else:
|
||||
self.emb = None
|
||||
|
||||
self.silu = nn.SiLU()
|
||||
if DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
self.silu = silu_kernel()
|
||||
else:
|
||||
self.silu = nn.SiLU()
|
||||
self.linear = nn.Linear(embedding_dim, output_dim)
|
||||
self.norm = nn.LayerNorm(output_dim // 2, norm_eps, norm_elementwise_affine)
|
||||
|
||||
@@ -144,7 +156,10 @@ class AdaLayerNormZero(nn.Module):
|
||||
else:
|
||||
self.emb = None
|
||||
|
||||
self.silu = nn.SiLU()
|
||||
if DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
self.silu = silu_kernel()
|
||||
else:
|
||||
self.silu = nn.SiLU()
|
||||
self.linear = nn.Linear(embedding_dim, 6 * embedding_dim, bias=bias)
|
||||
if norm_type == "layer_norm":
|
||||
self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6)
|
||||
@@ -183,7 +198,10 @@ class AdaLayerNormZeroSingle(nn.Module):
|
||||
def __init__(self, embedding_dim: int, norm_type="layer_norm", bias=True):
|
||||
super().__init__()
|
||||
|
||||
self.silu = nn.SiLU()
|
||||
if DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
self.silu = silu_kernel()
|
||||
else:
|
||||
self.silu = nn.SiLU()
|
||||
self.linear = nn.Linear(embedding_dim, 3 * embedding_dim, bias=bias)
|
||||
if norm_type == "layer_norm":
|
||||
self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6)
|
||||
@@ -335,7 +353,10 @@ class AdaLayerNormContinuous(nn.Module):
|
||||
norm_type="layer_norm",
|
||||
):
|
||||
super().__init__()
|
||||
self.silu = nn.SiLU()
|
||||
if DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
self.silu = silu_kernel()
|
||||
else:
|
||||
self.silu = nn.SiLU()
|
||||
self.linear = nn.Linear(conditioning_embedding_dim, embedding_dim * 2, bias=bias)
|
||||
if norm_type == "layer_norm":
|
||||
self.norm = LayerNorm(embedding_dim, eps, elementwise_affine, bias)
|
||||
@@ -508,6 +529,7 @@ else:
|
||||
return F.layer_norm(input, self.dim, self.weight, self.bias, self.eps)
|
||||
|
||||
|
||||
@use_kernel_forward_from_hub("RMSNorm")
|
||||
class RMSNorm(nn.Module):
|
||||
r"""
|
||||
RMS Norm as introduced in https://huggingface.co/papers/1910.07467 by Zhang et al.
|
||||
|
||||
@@ -22,7 +22,8 @@ import torch.nn.functional as F
|
||||
|
||||
from ...configuration_utils import ConfigMixin, register_to_config
|
||||
from ...loaders import FluxTransformer2DLoadersMixin, FromOriginalModelMixin, PeftAdapterMixin
|
||||
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
|
||||
from ...utils import USE_PEFT_BACKEND, is_kernels_available, logging, scale_lora_layers, unscale_lora_layers
|
||||
from ...utils.constants import DIFFUSERS_ENABLE_HUB_KERNELS
|
||||
from ...utils.torch_utils import maybe_allow_in_graph
|
||||
from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward
|
||||
from ..attention_dispatch import dispatch_attention_fn
|
||||
@@ -40,6 +41,12 @@ from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNo
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
if is_kernels_available() and DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
from kernels import get_kernel
|
||||
|
||||
activation = get_kernel("kernels-community/activation", revision="add_more_act")
|
||||
gelu_tanh_kernel = activation.layers.GeluTanh
|
||||
|
||||
|
||||
def _get_projections(attn: "FluxAttention", hidden_states, encoder_hidden_states=None):
|
||||
query = attn.to_q(hidden_states)
|
||||
@@ -300,8 +307,14 @@ class FluxAttention(torch.nn.Module, AttentionModuleMixin):
|
||||
self.added_kv_proj_dim = added_kv_proj_dim
|
||||
self.added_proj_bias = added_proj_bias
|
||||
|
||||
self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine)
|
||||
self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine)
|
||||
if DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
from ..normalization import RMSNorm
|
||||
|
||||
self.norm_q = RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine)
|
||||
self.norm_k = RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine)
|
||||
else:
|
||||
self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine)
|
||||
self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine)
|
||||
self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias)
|
||||
self.to_k = torch.nn.Linear(query_dim, self.inner_dim, bias=bias)
|
||||
self.to_v = torch.nn.Linear(query_dim, self.inner_dim, bias=bias)
|
||||
@@ -312,8 +325,14 @@ class FluxAttention(torch.nn.Module, AttentionModuleMixin):
|
||||
self.to_out.append(torch.nn.Dropout(dropout))
|
||||
|
||||
if added_kv_proj_dim is not None:
|
||||
self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps)
|
||||
self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps)
|
||||
if DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
from ..normalization import RMSNorm
|
||||
|
||||
self.norm_added_q = RMSNorm(dim_head, eps=eps)
|
||||
self.norm_added_k = RMSNorm(dim_head, eps=eps)
|
||||
else:
|
||||
self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps)
|
||||
self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps)
|
||||
self.add_q_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias)
|
||||
self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias)
|
||||
self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias)
|
||||
@@ -351,6 +370,11 @@ class FluxSingleTransformerBlock(nn.Module):
|
||||
self.norm = AdaLayerNormZeroSingle(dim)
|
||||
self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim)
|
||||
self.act_mlp = nn.GELU(approximate="tanh")
|
||||
# if not DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
# self.act_mlp = nn.GELU(approximate="tanh")
|
||||
# else:
|
||||
# self.act_mlp = gelu_tanh_kernel()
|
||||
|
||||
self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim)
|
||||
|
||||
self.attn = FluxAttention(
|
||||
|
||||
@@ -454,6 +454,9 @@ class FluxImg2ImgSetTimestepsStep(ModularPipelineBlocks):
|
||||
block_state = self.get_block_state(state)
|
||||
block_state.device = components._execution_device
|
||||
|
||||
block_state.height = block_state.height or components.default_height
|
||||
block_state.width = block_state.width or components.default_width
|
||||
|
||||
scheduler = components.scheduler
|
||||
transformer = components.transformer
|
||||
batch_size = block_state.batch_size * block_state.num_images_per_prompt
|
||||
@@ -659,8 +662,6 @@ class FluxImg2ImgPrepareLatentsStep(ModularPipelineBlocks):
|
||||
def __call__(self, components: FluxModularPipeline, state: PipelineState) -> PipelineState:
|
||||
block_state = self.get_block_state(state)
|
||||
|
||||
block_state.height = block_state.height or components.default_height
|
||||
block_state.width = block_state.width or components.default_width
|
||||
block_state.device = components._execution_device
|
||||
block_state.dtype = torch.bfloat16 # TODO: okay to hardcode this?
|
||||
block_state.num_channels_latents = components.num_channels_latents
|
||||
|
||||
@@ -148,8 +148,8 @@ TEXT2IMAGE_BLOCKS = InsertableDict(
|
||||
[
|
||||
("text_encoder", FluxTextEncoderStep),
|
||||
("input", FluxInputStep),
|
||||
("set_timesteps", FluxSetTimestepsStep),
|
||||
("prepare_latents", FluxPrepareLatentsStep),
|
||||
("set_timesteps", FluxSetTimestepsStep),
|
||||
("denoise", FluxDenoiseStep),
|
||||
("decode", FluxDecodeStep),
|
||||
]
|
||||
|
||||
@@ -651,6 +651,12 @@ class AllegroPipeline(DiffusionPipeline):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -658,6 +664,12 @@ class AllegroPipeline(DiffusionPipeline):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -666,6 +678,12 @@ class AllegroPipeline(DiffusionPipeline):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -673,6 +691,12 @@ class AllegroPipeline(DiffusionPipeline):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
@property
|
||||
|
||||
@@ -34,6 +34,7 @@ from transformers import (
|
||||
from ...models import AutoencoderKL
|
||||
from ...schedulers import KarrasDiffusionSchedulers
|
||||
from ...utils import (
|
||||
deprecate,
|
||||
is_accelerate_available,
|
||||
is_accelerate_version,
|
||||
is_librosa_available,
|
||||
@@ -228,6 +229,12 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_slicing
|
||||
@@ -236,6 +243,12 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
||||
|
||||
@@ -19,11 +19,7 @@ from transformers import CLIPTokenizer
|
||||
|
||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
||||
from ...schedulers import PNDMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
||||
from .blip_image_processing import BlipImageProcessor
|
||||
|
||||
@@ -25,6 +25,7 @@ from ...models import AutoencoderKL, ChromaTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -508,6 +509,12 @@ class ChromaPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -515,6 +522,12 @@ class ChromaPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -523,6 +536,12 @@ class ChromaPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -530,6 +549,12 @@ class ChromaPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.prepare_latents
|
||||
|
||||
@@ -25,6 +25,7 @@ from ...models import AutoencoderKL, ChromaTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -542,6 +543,12 @@ class ChromaImg2ImgPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -549,6 +556,12 @@ class ChromaImg2ImgPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -557,6 +570,12 @@ class ChromaImg2ImgPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -564,6 +583,12 @@ class ChromaImg2ImgPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_img2img.StableDiffusion3Img2ImgPipeline.get_timesteps
|
||||
|
||||
@@ -28,11 +28,7 @@ from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
|
||||
from ...models.embeddings import get_3d_rotary_pos_embed
|
||||
from ...pipelines.pipeline_utils import DiffusionPipeline
|
||||
from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from .pipeline_output import CogVideoXPipelineOutput
|
||||
|
||||
@@ -18,11 +18,7 @@ import torch
|
||||
|
||||
from ...models import UNet2DModel
|
||||
from ...schedulers import CMStochasticIterativeScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
|
||||
|
||||
@@ -20,11 +20,7 @@ from transformers import CLIPTokenizer
|
||||
|
||||
from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
|
||||
from ...schedulers import PNDMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..blip_diffusion.blip_image_processing import BlipImageProcessor
|
||||
from ..blip_diffusion.modeling_blip2 import Blip2QFormerModel
|
||||
|
||||
@@ -27,11 +27,7 @@ from ...models import AutoencoderKL, HunyuanDiT2DControlNetModel, HunyuanDiT2DMo
|
||||
from ...models.embeddings import get_2d_rotary_pos_embed
|
||||
from ...pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from ...schedulers import DDPMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ from ...models import AutoencoderKL, FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -545,6 +546,12 @@ class FluxPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -552,6 +559,12 @@ class FluxPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -560,6 +573,12 @@ class FluxPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -567,6 +586,12 @@ class FluxPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -26,6 +26,7 @@ from ...models.transformers import FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -496,6 +497,12 @@ class FluxControlPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -503,6 +510,12 @@ class FluxControlPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -511,6 +524,12 @@ class FluxControlPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -518,6 +537,12 @@ class FluxControlPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.prepare_latents
|
||||
|
||||
@@ -35,6 +35,7 @@ from ...models.transformers import FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -577,6 +578,12 @@ class FluxControlInpaintPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -584,6 +591,12 @@ class FluxControlInpaintPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -592,6 +605,12 @@ class FluxControlInpaintPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -599,6 +618,12 @@ class FluxControlInpaintPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -26,6 +26,7 @@ from ...models.transformers import FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -633,6 +634,12 @@ class FluxFillPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -640,6 +647,12 @@ class FluxFillPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -648,6 +661,12 @@ class FluxFillPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -655,6 +674,12 @@ class FluxFillPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux_img2img.FluxImg2ImgPipeline.prepare_latents
|
||||
|
||||
@@ -33,6 +33,7 @@ from ...models.transformers import FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -613,6 +614,12 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_slicing
|
||||
@@ -621,6 +628,12 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_tiling
|
||||
@@ -630,6 +643,12 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
|
||||
@@ -638,6 +657,12 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -32,6 +32,7 @@ from ...models import AutoencoderKL, FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -614,6 +615,12 @@ class FluxKontextPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_slicing
|
||||
@@ -622,6 +629,12 @@ class FluxKontextPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_tiling
|
||||
@@ -631,6 +644,12 @@ class FluxKontextPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
|
||||
@@ -639,6 +658,12 @@ class FluxKontextPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -22,6 +22,7 @@ from ...models import AutoencoderKL, FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -688,6 +689,12 @@ class FluxKontextInpaintPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_slicing
|
||||
@@ -696,6 +703,12 @@ class FluxKontextInpaintPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_tiling
|
||||
@@ -705,6 +718,12 @@ class FluxKontextInpaintPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
|
||||
@@ -713,6 +732,12 @@ class FluxKontextInpaintPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -522,6 +522,12 @@ class HiDreamImagePipeline(DiffusionPipeline, HiDreamImageLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -529,6 +535,12 @@ class HiDreamImagePipeline(DiffusionPipeline, HiDreamImageLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -537,6 +549,12 @@ class HiDreamImagePipeline(DiffusionPipeline, HiDreamImageLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -544,6 +562,12 @@ class HiDreamImagePipeline(DiffusionPipeline, HiDreamImageLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def check_inputs(
|
||||
|
||||
@@ -24,7 +24,7 @@ from ...image_processor import PipelineImageInput
|
||||
from ...loaders import HunyuanVideoLoraLoaderMixin
|
||||
from ...models import AutoencoderKLHunyuanVideo, HunyuanVideoTransformer3DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
@@ -463,6 +463,12 @@ class HunyuanSkyreelsImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoa
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -470,6 +476,12 @@ class HunyuanSkyreelsImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoa
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -478,6 +490,12 @@ class HunyuanSkyreelsImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoa
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -485,6 +503,12 @@ class HunyuanSkyreelsImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoa
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
@property
|
||||
|
||||
@@ -23,7 +23,7 @@ from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...loaders import HunyuanVideoLoraLoaderMixin
|
||||
from ...models import AutoencoderKLHunyuanVideo, HunyuanVideoTransformer3DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
@@ -420,6 +420,12 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -427,6 +433,12 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -435,6 +447,12 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -442,6 +460,12 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
@property
|
||||
|
||||
@@ -33,7 +33,7 @@ from ...image_processor import PipelineImageInput
|
||||
from ...loaders import HunyuanVideoLoraLoaderMixin
|
||||
from ...models import AutoencoderKLHunyuanVideo, HunyuanVideoFramepackTransformer3DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
@@ -570,6 +570,12 @@ class HunyuanVideoFramepackPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMix
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -577,6 +583,12 @@ class HunyuanVideoFramepackPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMix
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -585,6 +597,12 @@ class HunyuanVideoFramepackPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMix
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -592,6 +610,12 @@ class HunyuanVideoFramepackPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMix
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
@property
|
||||
|
||||
@@ -30,7 +30,7 @@ from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...loaders import HunyuanVideoLoraLoaderMixin
|
||||
from ...models import AutoencoderKLHunyuanVideo, HunyuanVideoTransformer3DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
@@ -598,6 +598,12 @@ class HunyuanVideoImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoader
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -605,6 +611,12 @@ class HunyuanVideoImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoader
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -613,6 +625,12 @@ class HunyuanVideoImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoader
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -620,6 +638,12 @@ class HunyuanVideoImageToVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoader
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
@property
|
||||
|
||||
@@ -27,11 +27,7 @@ from ...models import AutoencoderKL, HunyuanDiT2DModel
|
||||
from ...models.embeddings import get_2d_rotary_pos_embed
|
||||
from ...pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from ...schedulers import DDPMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
|
||||
|
||||
@@ -21,11 +21,7 @@ from transformers import (
|
||||
|
||||
from ...models import UNet2DConditionModel, VQModel
|
||||
from ...schedulers import DDIMScheduler, DDPMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from .text_encoder import MultilingualCLIP
|
||||
|
||||
@@ -23,11 +23,7 @@ from transformers import (
|
||||
from ...image_processor import VaeImageProcessor
|
||||
from ...models import UNet2DConditionModel, VQModel
|
||||
from ...schedulers import DDIMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from .text_encoder import MultilingualCLIP
|
||||
|
||||
@@ -28,11 +28,7 @@ from transformers import (
|
||||
from ... import __version__
|
||||
from ...models import UNet2DConditionModel, VQModel
|
||||
from ...schedulers import DDIMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
from .text_encoder import MultilingualCLIP
|
||||
|
||||
@@ -6,11 +6,7 @@ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTo
|
||||
|
||||
from ...models import PriorTransformer
|
||||
from ...schedulers import UnCLIPScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..kandinsky import KandinskyPriorPipelineOutput
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
|
||||
@@ -6,11 +6,7 @@ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTo
|
||||
|
||||
from ...models import PriorTransformer
|
||||
from ...schedulers import UnCLIPScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..kandinsky import KandinskyPriorPipelineOutput
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
|
||||
@@ -722,6 +722,12 @@ class LEditsPPPipelineStableDiffusion(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -729,6 +735,12 @@ class LEditsPPPipelineStableDiffusion(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -737,6 +749,12 @@ class LEditsPPPipelineStableDiffusion(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -744,6 +762,12 @@ class LEditsPPPipelineStableDiffusion(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
@torch.no_grad()
|
||||
|
||||
@@ -44,6 +44,7 @@ from ...models.lora import adjust_lora_scale_text_encoder
|
||||
from ...schedulers import DDIMScheduler, DPMSolverMultistepScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_invisible_watermark_available,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
@@ -770,6 +771,12 @@ class LEditsPPPipelineStableDiffusionXL(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -777,6 +784,12 @@ class LEditsPPPipelineStableDiffusionXL(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -785,6 +798,12 @@ class LEditsPPPipelineStableDiffusionXL(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -792,6 +811,12 @@ class LEditsPPPipelineStableDiffusionXL(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.ledits_pp.pipeline_leditspp_stable_diffusion.LEditsPPPipelineStableDiffusion.prepare_unet
|
||||
|
||||
@@ -18,7 +18,7 @@ import torch
|
||||
|
||||
from ...image_processor import PipelineImageInput
|
||||
from ...models import AutoencoderKLLTXVideo
|
||||
from ...utils import get_logger
|
||||
from ...utils import deprecate, get_logger
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
@@ -148,6 +148,12 @@ class LTXLatentUpsamplePipeline(DiffusionPipeline):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -155,6 +161,12 @@ class LTXLatentUpsamplePipeline(DiffusionPipeline):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -163,6 +175,12 @@ class LTXLatentUpsamplePipeline(DiffusionPipeline):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -170,6 +188,12 @@ class LTXLatentUpsamplePipeline(DiffusionPipeline):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def check_inputs(self, video, height, width, latents):
|
||||
|
||||
@@ -433,6 +433,12 @@ class Lumina2Pipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -440,6 +446,12 @@ class Lumina2Pipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -448,6 +460,12 @@ class Lumina2Pipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -455,6 +473,12 @@ class Lumina2Pipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
||||
|
||||
@@ -23,11 +23,7 @@ from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
||||
from ...loaders import Mochi1LoraLoaderMixin
|
||||
from ...models import AutoencoderKLMochi, MochiTransformer3DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ...video_processor import VideoProcessor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
@@ -396,6 +392,12 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -403,6 +405,12 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -411,6 +419,12 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -418,6 +432,12 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -23,7 +23,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...models.autoencoders import AutoencoderKL
|
||||
from ...models.transformers import OmniGenTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, is_torchvision_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, is_torchvision_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
||||
|
||||
@@ -235,6 +235,12 @@ class OmniGenPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -242,6 +248,12 @@ class OmniGenPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -250,6 +262,12 @@ class OmniGenPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -257,6 +275,12 @@ class OmniGenPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_latents
|
||||
|
||||
@@ -28,11 +28,7 @@ from ...models.attention_processor import PAGCFGHunyuanAttnProcessor2_0, PAGHuny
|
||||
from ...models.embeddings import get_2d_rotary_pos_embed
|
||||
from ...pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from ...schedulers import DDPMScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pag_utils import PAGMixin
|
||||
|
||||
@@ -29,6 +29,7 @@ from ...models.attention_processor import PAGCFGSanaLinearAttnProcessor2_0, PAGI
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
BACKENDS_MAPPING,
|
||||
deprecate,
|
||||
is_bs4_available,
|
||||
is_ftfy_available,
|
||||
is_torch_xla_available,
|
||||
@@ -190,6 +191,12 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -197,6 +204,12 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -205,6 +218,12 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -212,6 +231,12 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def encode_prompt(
|
||||
|
||||
@@ -57,6 +57,7 @@ from ..utils import (
|
||||
PushToHubMixin,
|
||||
_get_detailed_type,
|
||||
_is_valid_type,
|
||||
deprecate,
|
||||
is_accelerate_available,
|
||||
is_accelerate_version,
|
||||
is_hpu_available,
|
||||
@@ -504,6 +505,13 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
os.environ["PT_HPU_MAX_COMPOUND_OP_SIZE"] = "1"
|
||||
logger.debug("Environment variable set: PT_HPU_MAX_COMPOUND_OP_SIZE=1")
|
||||
|
||||
if dtype in (torch.bfloat16, None) and kwargs.pop("sdp_on_bf16", True):
|
||||
if hasattr(torch._C, "_set_math_sdp_allow_fp16_bf16_reduction"):
|
||||
torch._C._set_math_sdp_allow_fp16_bf16_reduction(True)
|
||||
logger.warning(
|
||||
"Enabled SDP with BF16 precision on HPU. To disable, please use `.to('hpu', sdp_on_bf16=False)`"
|
||||
)
|
||||
|
||||
module_names, _ = self._get_signature_keys(self)
|
||||
modules = [getattr(self, n, None) for n in module_names]
|
||||
modules = [m for m in modules if isinstance(m, torch.nn.Module)]
|
||||
@@ -1334,6 +1342,133 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
offload_buffers = len(model._parameters) > 0
|
||||
cpu_offload(model, device, offload_buffers=offload_buffers)
|
||||
|
||||
def enable_group_offload(
|
||||
self,
|
||||
onload_device: torch.device,
|
||||
offload_device: torch.device = torch.device("cpu"),
|
||||
offload_type: str = "block_level",
|
||||
num_blocks_per_group: Optional[int] = None,
|
||||
non_blocking: bool = False,
|
||||
use_stream: bool = False,
|
||||
record_stream: bool = False,
|
||||
low_cpu_mem_usage=False,
|
||||
offload_to_disk_path: Optional[str] = None,
|
||||
exclude_modules: Optional[Union[str, List[str]]] = None,
|
||||
) -> None:
|
||||
r"""
|
||||
Applies group offloading to the internal layers of a torch.nn.Module. To understand what group offloading is,
|
||||
and where it is beneficial, we need to first provide some context on how other supported offloading methods
|
||||
work.
|
||||
|
||||
Typically, offloading is done at two levels:
|
||||
- Module-level: In Diffusers, this can be enabled using the `ModelMixin::enable_model_cpu_offload()` method. It
|
||||
works by offloading each component of a pipeline to the CPU for storage, and onloading to the accelerator
|
||||
device when needed for computation. This method is more memory-efficient than keeping all components on the
|
||||
accelerator, but the memory requirements are still quite high. For this method to work, one needs memory
|
||||
equivalent to size of the model in runtime dtype + size of largest intermediate activation tensors to be able
|
||||
to complete the forward pass.
|
||||
- Leaf-level: In Diffusers, this can be enabled using the `ModelMixin::enable_sequential_cpu_offload()` method.
|
||||
It
|
||||
works by offloading the lowest leaf-level parameters of the computation graph to the CPU for storage, and
|
||||
onloading only the leafs to the accelerator device for computation. This uses the lowest amount of accelerator
|
||||
memory, but can be slower due to the excessive number of device synchronizations.
|
||||
|
||||
Group offloading is a middle ground between the two methods. It works by offloading groups of internal layers,
|
||||
(either `torch.nn.ModuleList` or `torch.nn.Sequential`). This method uses lower memory than module-level
|
||||
offloading. It is also faster than leaf-level/sequential offloading, as the number of device synchronizations
|
||||
is reduced.
|
||||
|
||||
Another supported feature (for CUDA devices with support for asynchronous data transfer streams) is the ability
|
||||
to overlap data transfer and computation to reduce the overall execution time compared to sequential
|
||||
offloading. This is enabled using layer prefetching with streams, i.e., the layer that is to be executed next
|
||||
starts onloading to the accelerator device while the current layer is being executed - this increases the
|
||||
memory requirements slightly. Note that this implementation also supports leaf-level offloading but can be made
|
||||
much faster when using streams.
|
||||
|
||||
Args:
|
||||
onload_device (`torch.device`):
|
||||
The device to which the group of modules are onloaded.
|
||||
offload_device (`torch.device`, defaults to `torch.device("cpu")`):
|
||||
The device to which the group of modules are offloaded. This should typically be the CPU. Default is
|
||||
CPU.
|
||||
offload_type (`str` or `GroupOffloadingType`, defaults to "block_level"):
|
||||
The type of offloading to be applied. Can be one of "block_level" or "leaf_level". Default is
|
||||
"block_level".
|
||||
offload_to_disk_path (`str`, *optional*, defaults to `None`):
|
||||
The path to the directory where parameters will be offloaded. Setting this option can be useful in
|
||||
limited RAM environment settings where a reasonable speed-memory trade-off is desired.
|
||||
num_blocks_per_group (`int`, *optional*):
|
||||
The number of blocks per group when using offload_type="block_level". This is required when using
|
||||
offload_type="block_level".
|
||||
non_blocking (`bool`, defaults to `False`):
|
||||
If True, offloading and onloading is done with non-blocking data transfer.
|
||||
use_stream (`bool`, defaults to `False`):
|
||||
If True, offloading and onloading is done asynchronously using a CUDA stream. This can be useful for
|
||||
overlapping computation and data transfer.
|
||||
record_stream (`bool`, defaults to `False`): When enabled with `use_stream`, it marks the current tensor
|
||||
as having been used by this stream. It is faster at the expense of slightly more memory usage. Refer to
|
||||
the [PyTorch official docs](https://pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html)
|
||||
more details.
|
||||
low_cpu_mem_usage (`bool`, defaults to `False`):
|
||||
If True, the CPU memory usage is minimized by pinning tensors on-the-fly instead of pre-pinning them.
|
||||
This option only matters when using streamed CPU offloading (i.e. `use_stream=True`). This can be
|
||||
useful when the CPU memory is a bottleneck but may counteract the benefits of using streams.
|
||||
exclude_modules (`Union[str, List[str]]`, defaults to `None`): List of modules to exclude from offloading.
|
||||
|
||||
Example:
|
||||
```python
|
||||
>>> from diffusers import DiffusionPipeline
|
||||
>>> import torch
|
||||
|
||||
>>> pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image", torch_dtype=torch.bfloat16)
|
||||
|
||||
>>> pipe.enable_group_offload(
|
||||
... onload_device=torch.device("cuda"),
|
||||
... offload_device=torch.device("cpu"),
|
||||
... offload_type="leaf_level",
|
||||
... use_stream=True,
|
||||
... )
|
||||
>>> image = pipe("a beautiful sunset").images[0]
|
||||
```
|
||||
"""
|
||||
from ..hooks import apply_group_offloading
|
||||
|
||||
if isinstance(exclude_modules, str):
|
||||
exclude_modules = [exclude_modules]
|
||||
elif exclude_modules is None:
|
||||
exclude_modules = []
|
||||
|
||||
unknown = set(exclude_modules) - self.components.keys()
|
||||
if unknown:
|
||||
logger.info(
|
||||
f"The following modules are not present in pipeline: {', '.join(unknown)}. Ignore if this is expected."
|
||||
)
|
||||
|
||||
group_offload_kwargs = {
|
||||
"onload_device": onload_device,
|
||||
"offload_device": offload_device,
|
||||
"offload_type": offload_type,
|
||||
"num_blocks_per_group": num_blocks_per_group,
|
||||
"non_blocking": non_blocking,
|
||||
"use_stream": use_stream,
|
||||
"record_stream": record_stream,
|
||||
"low_cpu_mem_usage": low_cpu_mem_usage,
|
||||
"offload_to_disk_path": offload_to_disk_path,
|
||||
}
|
||||
for name, component in self.components.items():
|
||||
if name not in exclude_modules and isinstance(component, torch.nn.Module):
|
||||
if hasattr(component, "enable_group_offload"):
|
||||
component.enable_group_offload(**group_offload_kwargs)
|
||||
else:
|
||||
apply_group_offloading(module=component, **group_offload_kwargs)
|
||||
|
||||
if exclude_modules:
|
||||
for module_name in exclude_modules:
|
||||
module = getattr(self, module_name, None)
|
||||
if module is not None and isinstance(module, torch.nn.Module):
|
||||
module.to(onload_device)
|
||||
logger.debug(f"Placed `{module_name}` on {onload_device} device as it was in `exclude_modules`.")
|
||||
|
||||
def reset_device_map(self):
|
||||
r"""
|
||||
Resets the device maps (if any) to None.
|
||||
@@ -2074,6 +2209,12 @@ class StableDiffusionMixin:
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -2081,6 +2222,12 @@ class StableDiffusionMixin:
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -2089,6 +2236,12 @@ class StableDiffusionMixin:
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -2096,6 +2249,12 @@ class StableDiffusionMixin:
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
|
||||
|
||||
@@ -23,7 +23,7 @@ from ...image_processor import VaeImageProcessor
|
||||
from ...loaders import QwenImageLoraLoaderMixin
|
||||
from ...models import AutoencoderKLQwenImage, QwenImageTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import QwenImagePipelineOutput
|
||||
@@ -348,6 +348,12 @@ class QwenImagePipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -355,6 +361,12 @@ class QwenImagePipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -363,6 +375,12 @@ class QwenImagePipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -370,6 +388,12 @@ class QwenImagePipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -24,7 +24,7 @@ from ...loaders import QwenImageLoraLoaderMixin
|
||||
from ...models import AutoencoderKLQwenImage, QwenImageTransformer2DModel
|
||||
from ...models.controlnets.controlnet_qwenimage import QwenImageControlNetModel, QwenImageMultiControlNetModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import QwenImagePipelineOutput
|
||||
@@ -265,7 +265,7 @@ class QwenImageControlNetPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
txt = [template.format(e) for e in prompt]
|
||||
txt_tokens = self.tokenizer(
|
||||
txt, max_length=self.tokenizer_max_length + drop_idx, padding=True, truncation=True, return_tensors="pt"
|
||||
).to(self.device)
|
||||
).to(device)
|
||||
encoder_hidden_states = self.text_encoder(
|
||||
input_ids=txt_tokens.input_ids,
|
||||
attention_mask=txt_tokens.attention_mask,
|
||||
@@ -412,6 +412,12 @@ class QwenImageControlNetPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -419,6 +425,12 @@ class QwenImageControlNetPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -427,6 +439,12 @@ class QwenImageControlNetPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -434,6 +452,12 @@ class QwenImageControlNetPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.QwenImagePipeline.prepare_latents
|
||||
|
||||
@@ -24,7 +24,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...loaders import QwenImageLoraLoaderMixin
|
||||
from ...models import AutoencoderKLQwenImage, QwenImageTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import QwenImagePipelineOutput
|
||||
@@ -421,6 +421,12 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -428,6 +434,12 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -436,6 +448,12 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -443,6 +461,12 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -25,7 +25,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...loaders import QwenImageLoraLoaderMixin
|
||||
from ...models import AutoencoderKLQwenImage, QwenImageTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import QwenImagePipelineOutput
|
||||
@@ -466,6 +466,12 @@ class QwenImageEditInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -473,6 +479,12 @@ class QwenImageEditInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -481,6 +493,12 @@ class QwenImageEditInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -488,6 +506,12 @@ class QwenImageEditInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage_inpaint.QwenImageInpaintPipeline.prepare_latents
|
||||
|
||||
@@ -9,7 +9,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...loaders import QwenImageLoraLoaderMixin
|
||||
from ...models import AutoencoderKLQwenImage, QwenImageTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import QwenImagePipelineOutput
|
||||
@@ -397,6 +397,12 @@ class QwenImageImg2ImgPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -404,6 +410,12 @@ class QwenImageImg2ImgPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -412,6 +424,12 @@ class QwenImageImg2ImgPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -419,6 +437,12 @@ class QwenImageImg2ImgPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -10,7 +10,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
||||
from ...loaders import QwenImageLoraLoaderMixin
|
||||
from ...models import AutoencoderKLQwenImage, QwenImageTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
from .pipeline_output import QwenImagePipelineOutput
|
||||
@@ -424,6 +424,12 @@ class QwenImageInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -431,6 +437,12 @@ class QwenImageInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -439,6 +451,12 @@ class QwenImageInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -446,6 +464,12 @@ class QwenImageInpaintPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def prepare_latents(
|
||||
|
||||
@@ -30,6 +30,7 @@ from ...schedulers import DPMSolverMultistepScheduler
|
||||
from ...utils import (
|
||||
BACKENDS_MAPPING,
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_bs4_available,
|
||||
is_ftfy_available,
|
||||
is_torch_xla_available,
|
||||
@@ -224,6 +225,12 @@ class SanaPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -231,6 +238,12 @@ class SanaPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -239,6 +252,12 @@ class SanaPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -246,6 +265,12 @@ class SanaPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def _get_gemma_prompt_embeds(
|
||||
|
||||
@@ -30,6 +30,7 @@ from ...schedulers import DPMSolverMultistepScheduler
|
||||
from ...utils import (
|
||||
BACKENDS_MAPPING,
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_bs4_available,
|
||||
is_ftfy_available,
|
||||
is_torch_xla_available,
|
||||
@@ -237,6 +238,12 @@ class SanaControlNetPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -244,6 +251,12 @@ class SanaControlNetPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -252,6 +265,12 @@ class SanaControlNetPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -259,6 +278,12 @@ class SanaControlNetPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.sana.pipeline_sana.SanaPipeline._get_gemma_prompt_embeds
|
||||
|
||||
@@ -30,6 +30,7 @@ from ...schedulers import DPMSolverMultistepScheduler
|
||||
from ...utils import (
|
||||
BACKENDS_MAPPING,
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_bs4_available,
|
||||
is_ftfy_available,
|
||||
is_torch_xla_available,
|
||||
@@ -175,6 +176,12 @@ class SanaSprintPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -182,6 +189,12 @@ class SanaSprintPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -190,6 +203,12 @@ class SanaSprintPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -197,6 +216,12 @@ class SanaSprintPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.sana.pipeline_sana.SanaPipeline._get_gemma_prompt_embeds
|
||||
|
||||
@@ -31,6 +31,7 @@ from ...schedulers import DPMSolverMultistepScheduler
|
||||
from ...utils import (
|
||||
BACKENDS_MAPPING,
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_bs4_available,
|
||||
is_ftfy_available,
|
||||
is_torch_xla_available,
|
||||
@@ -183,6 +184,12 @@ class SanaSprintImg2ImgPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.sana.pipeline_sana.SanaPipeline.disable_vae_slicing
|
||||
@@ -191,6 +198,12 @@ class SanaSprintImg2ImgPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.sana.pipeline_sana.SanaPipeline.enable_vae_tiling
|
||||
@@ -200,6 +213,12 @@ class SanaSprintImg2ImgPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -207,6 +226,12 @@ class SanaSprintImg2ImgPipeline(DiffusionPipeline, SanaLoraLoaderMixin):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.sana.pipeline_sana.SanaPipeline._get_gemma_prompt_embeds
|
||||
|
||||
@@ -25,11 +25,7 @@ from transformers import (
|
||||
from ...models import AutoencoderOobleck, StableAudioDiTModel
|
||||
from ...models.embeddings import get_1d_rotary_pos_embed
|
||||
from ...schedulers import EDMDPMSolverMultistepScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
|
||||
from ...utils.torch_utils import randn_tensor
|
||||
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
|
||||
from .modeling_stable_audio import StableAudioProjectionModel
|
||||
@@ -134,6 +130,12 @@ class StableAudioPipeline(DiffusionPipeline):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_slicing
|
||||
@@ -142,6 +144,12 @@ class StableAudioPipeline(DiffusionPipeline):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def encode_prompt(
|
||||
|
||||
@@ -232,6 +232,12 @@ class UniDiffuserPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_slicing
|
||||
@@ -240,6 +246,12 @@ class UniDiffuserPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
# Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.enable_vae_tiling
|
||||
@@ -249,6 +261,12 @@ class UniDiffuserPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
# Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_tiling
|
||||
@@ -257,6 +275,12 @@ class UniDiffuserPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
# Functions to manually set the mode
|
||||
|
||||
@@ -22,11 +22,7 @@ from ...loaders import FluxLoraLoaderMixin, FromSingleFileMixin, TextualInversio
|
||||
from ...models.autoencoders import AutoencoderKL
|
||||
from ...models.transformers import FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
)
|
||||
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
||||
from ..flux.pipeline_flux_fill import FluxFillPipeline as VisualClozeUpsamplingPipeline
|
||||
from ..flux.pipeline_output import FluxPipelineOutput
|
||||
from ..pipeline_utils import DiffusionPipeline
|
||||
|
||||
@@ -24,6 +24,7 @@ from ...models.transformers import FluxTransformer2DModel
|
||||
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
||||
from ...utils import (
|
||||
USE_PEFT_BACKEND,
|
||||
deprecate,
|
||||
is_torch_xla_available,
|
||||
logging,
|
||||
replace_example_docstring,
|
||||
@@ -524,6 +525,12 @@ class VisualClozeGenerationPipeline(
|
||||
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
||||
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`."
|
||||
deprecate(
|
||||
"enable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_slicing()
|
||||
|
||||
def disable_vae_slicing(self):
|
||||
@@ -531,6 +538,12 @@ class VisualClozeGenerationPipeline(
|
||||
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`."
|
||||
deprecate(
|
||||
"disable_vae_slicing",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_slicing()
|
||||
|
||||
def enable_vae_tiling(self):
|
||||
@@ -539,6 +552,12 @@ class VisualClozeGenerationPipeline(
|
||||
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
||||
processing larger images.
|
||||
"""
|
||||
depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`."
|
||||
deprecate(
|
||||
"enable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.enable_tiling()
|
||||
|
||||
def disable_vae_tiling(self):
|
||||
@@ -546,6 +565,12 @@ class VisualClozeGenerationPipeline(
|
||||
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
||||
computing decoding in one step.
|
||||
"""
|
||||
depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`."
|
||||
deprecate(
|
||||
"disable_vae_tiling",
|
||||
"0.40.0",
|
||||
depr_message,
|
||||
)
|
||||
self.vae.disable_tiling()
|
||||
|
||||
def _prepare_latents(self, image, mask, gen, vae_scale_factor, device, dtype):
|
||||
|
||||
@@ -110,7 +110,7 @@ class VisualClozeProcessor(VaeImageProcessor):
|
||||
new_h = int(processed_images[i][j].height * (new_w / processed_images[i][j].width))
|
||||
new_w = int(new_w / 16) * 16
|
||||
new_h = int(new_h / 16) * 16
|
||||
processed_images[i][j] = self.height(processed_images[i][j], new_h, new_w)
|
||||
processed_images[i][j] = self._resize_and_crop(processed_images[i][j], new_h, new_w)
|
||||
|
||||
# Convert to tensors and normalize
|
||||
image_sizes = []
|
||||
|
||||
@@ -48,12 +48,15 @@ class PipelineQuantizationConfig:
|
||||
self,
|
||||
quant_backend: str = None,
|
||||
quant_kwargs: Dict[str, Union[str, float, int, dict]] = None,
|
||||
components_to_quantize: Optional[List[str]] = None,
|
||||
components_to_quantize: Optional[Union[List[str], str]] = None,
|
||||
quant_mapping: Dict[str, Union[DiffQuantConfigMixin, "TransformersQuantConfigMixin"]] = None,
|
||||
):
|
||||
self.quant_backend = quant_backend
|
||||
# Initialize kwargs to be {} to set to the defaults.
|
||||
self.quant_kwargs = quant_kwargs or {}
|
||||
if components_to_quantize:
|
||||
if isinstance(components_to_quantize, str):
|
||||
components_to_quantize = [components_to_quantize]
|
||||
self.components_to_quantize = components_to_quantize
|
||||
self.quant_mapping = quant_mapping
|
||||
self.config_mapping = {} # book-keeping Example: `{module_name: quant_config}`
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import Union
|
||||
|
||||
from ..utils import get_logger
|
||||
from .import_utils import is_kernels_available
|
||||
|
||||
@@ -21,3 +23,42 @@ def _get_fa3_from_hub():
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred while fetching kernel '{_DEFAULT_HUB_ID_FA3}' from the Hub: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if is_kernels_available():
|
||||
from kernels import (
|
||||
Device,
|
||||
LayerRepository,
|
||||
register_kernel_mapping,
|
||||
replace_kernel_forward_from_hub,
|
||||
use_kernel_forward_from_hub,
|
||||
)
|
||||
|
||||
_KERNEL_MAPPING: dict[str, dict[Union[Device, str], LayerRepository]] = {
|
||||
"RMSNorm": {
|
||||
"cuda": LayerRepository(repo_id="kernels-community/liger_kernels", layer_name="LigerRMSNorm"),
|
||||
},
|
||||
}
|
||||
|
||||
register_kernel_mapping(_KERNEL_MAPPING)
|
||||
|
||||
else:
|
||||
# Stub to make decorators int transformers work when `kernels`
|
||||
# is not installed.
|
||||
def use_kernel_forward_from_hub(*args, **kwargs):
|
||||
def decorator(cls):
|
||||
return cls
|
||||
|
||||
return decorator
|
||||
|
||||
class LayerRepository:
|
||||
def __init__(self, *args, **kwargs):
|
||||
raise RuntimeError("LayerRepository requires `kernels` to be installed. Run `pip install kernels`.")
|
||||
|
||||
def replace_kernel_forward_from_hub(*args, **kwargs):
|
||||
raise RuntimeError(
|
||||
"replace_kernel_forward_from_hub requires `kernels` to be installed. Run `pip install kernels`."
|
||||
)
|
||||
|
||||
def register_kernel_mapping(*args, **kwargs):
|
||||
raise RuntimeError("register_kernel_mapping requires `kernels` to be installed. Run `pip install kernels`.")
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Any, Callable, Dict, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from huggingface_hub import ModelCard, delete_repo
|
||||
@@ -2362,6 +2363,73 @@ class PipelineTesterMixin:
|
||||
max_diff = np.abs(to_np(out) - to_np(loaded_out)).max()
|
||||
self.assertLess(max_diff, expected_max_difference)
|
||||
|
||||
@require_torch_accelerator
|
||||
def test_pipeline_level_group_offloading_sanity_checks(self):
|
||||
components = self.get_dummy_components()
|
||||
pipe: DiffusionPipeline = self.pipeline_class(**components)
|
||||
|
||||
for name, component in pipe.components.items():
|
||||
if hasattr(component, "_supports_group_offloading"):
|
||||
if not component._supports_group_offloading:
|
||||
pytest.skip(f"{self.pipeline_class.__name__} is not suitable for this test.")
|
||||
|
||||
module_names = sorted(
|
||||
[name for name, component in pipe.components.items() if isinstance(component, torch.nn.Module)]
|
||||
)
|
||||
exclude_module_name = module_names[0]
|
||||
offload_device = "cpu"
|
||||
pipe.enable_group_offload(
|
||||
onload_device=torch_device,
|
||||
offload_device=offload_device,
|
||||
offload_type="leaf_level",
|
||||
exclude_modules=exclude_module_name,
|
||||
)
|
||||
excluded_module = getattr(pipe, exclude_module_name)
|
||||
self.assertTrue(torch.device(excluded_module.device).type == torch.device(torch_device).type)
|
||||
|
||||
for name, component in pipe.components.items():
|
||||
if name not in [exclude_module_name] and isinstance(component, torch.nn.Module):
|
||||
# `component.device` prints the `onload_device` type. We should probably override the
|
||||
# `device` property in `ModelMixin`.
|
||||
component_device = next(component.parameters())[0].device
|
||||
self.assertTrue(torch.device(component_device).type == torch.device(offload_device).type)
|
||||
|
||||
@require_torch_accelerator
|
||||
def test_pipeline_level_group_offloading_inference(self, expected_max_difference=1e-4):
|
||||
components = self.get_dummy_components()
|
||||
pipe: DiffusionPipeline = self.pipeline_class(**components)
|
||||
|
||||
for name, component in pipe.components.items():
|
||||
if hasattr(component, "_supports_group_offloading"):
|
||||
if not component._supports_group_offloading:
|
||||
pytest.skip(f"{self.pipeline_class.__name__} is not suitable for this test.")
|
||||
|
||||
# Regular inference.
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
torch.manual_seed(0)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
inputs["generator"] = torch.manual_seed(0)
|
||||
out = pipe(**inputs)[0]
|
||||
|
||||
pipe.to("cpu")
|
||||
del pipe
|
||||
|
||||
# Inference with offloading
|
||||
pipe: DiffusionPipeline = self.pipeline_class(**components)
|
||||
offload_device = "cpu"
|
||||
pipe.enable_group_offload(
|
||||
onload_device=torch_device,
|
||||
offload_device=offload_device,
|
||||
offload_type="leaf_level",
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
inputs["generator"] = torch.manual_seed(0)
|
||||
out_offload = pipe(**inputs)[0]
|
||||
|
||||
max_diff = np.abs(to_np(out) - to_np(out_offload)).max()
|
||||
self.assertLess(max_diff, expected_max_difference)
|
||||
|
||||
|
||||
@is_staging_test
|
||||
class PipelinePushToHubTester(unittest.TestCase):
|
||||
|
||||
@@ -299,3 +299,19 @@ transformer BitsAndBytesConfig {
|
||||
data = json.loads(json_part)
|
||||
|
||||
return data
|
||||
|
||||
def test_single_component_to_quantize(self):
|
||||
component_to_quantize = "transformer"
|
||||
quant_config = PipelineQuantizationConfig(
|
||||
quant_backend="bitsandbytes_8bit",
|
||||
quant_kwargs={"load_in_8bit": True},
|
||||
components_to_quantize=component_to_quantize,
|
||||
)
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
self.model_name,
|
||||
quantization_config=quant_config,
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
for name, component in pipe.components.items():
|
||||
if name == component_to_quantize:
|
||||
self.assertTrue(hasattr(component.config, "quantization_config"))
|
||||
|
||||
@@ -69,3 +69,11 @@ class FluxTransformer2DModelSingleFileTests(unittest.TestCase):
|
||||
del model
|
||||
gc.collect()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_device_map_cuda(self):
|
||||
backend_empty_cache(torch_device)
|
||||
model = self.model_class.from_single_file(self.ckpt_path, device_map="cuda")
|
||||
|
||||
del model
|
||||
gc.collect()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
Reference in New Issue
Block a user