Compare commits
36 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e1f502fac1 | |||
| 9150ab02f6 | |||
| 93f71d95a2 | |||
| de4ba0a977 | |||
| 40f12d2aea | |||
| 1e0856616a | |||
| fa926e78f5 | |||
| a8c5801e26 | |||
| 2743c9ee3b | |||
| 2c47a2ffd4 | |||
| 772c32e433 | |||
| 4256de9fea | |||
| 6c0c72de9c | |||
| c8afd1c8b4 | |||
| 02fd92e38e | |||
| fc337d5853 | |||
| 32798bf242 | |||
| c2e5ece08b | |||
| 764b62473a | |||
| 6682956333 | |||
| ffc8c0c1e1 | |||
| 4acbfbf13b | |||
| 6549b04ec6 | |||
| 130fd8df54 | |||
| bcd4d77ba6 | |||
| 006d092751 | |||
| 9e4a75b142 | |||
| 0ff1aa910c | |||
| 901da9dccc | |||
| 67ffa7031e | |||
| 827fad66a0 | |||
| 9b721db205 | |||
| ba0e732eb0 | |||
| b2da59b197 | |||
| 7aa6af1138 | |||
| 87b800e154 |
@@ -340,6 +340,9 @@ jobs:
|
||||
- backend: "optimum_quanto"
|
||||
test_location: "quanto"
|
||||
additional_deps: []
|
||||
- backend: "nvidia_modelopt"
|
||||
test_location: "modelopt"
|
||||
additional_deps: []
|
||||
runs-on:
|
||||
group: aws-g6e-xlarge-plus
|
||||
container:
|
||||
|
||||
@@ -24,12 +24,14 @@
|
||||
title: Reproducibility
|
||||
- local: using-diffusers/schedulers
|
||||
title: Load schedulers and models
|
||||
- local: using-diffusers/models
|
||||
title: Models
|
||||
- local: using-diffusers/scheduler_features
|
||||
title: Scheduler features
|
||||
- local: using-diffusers/other-formats
|
||||
title: Model files and layouts
|
||||
- local: using-diffusers/push_to_hub
|
||||
title: Push files to the Hub
|
||||
title: Sharing pipelines and models
|
||||
|
||||
- title: Adapters
|
||||
isExpanded: false
|
||||
@@ -58,12 +60,6 @@
|
||||
title: Batch inference
|
||||
- local: training/distributed_inference
|
||||
title: Distributed inference
|
||||
- local: using-diffusers/scheduler_features
|
||||
title: Scheduler features
|
||||
- local: using-diffusers/callback
|
||||
title: Pipeline callbacks
|
||||
- local: using-diffusers/image_quality
|
||||
title: Controlling image quality
|
||||
|
||||
- title: Inference optimization
|
||||
isExpanded: false
|
||||
@@ -92,6 +88,8 @@
|
||||
title: xDiT
|
||||
- local: optimization/para_attn
|
||||
title: ParaAttention
|
||||
- local: using-diffusers/image_quality
|
||||
title: FreeU
|
||||
|
||||
- title: Hybrid Inference
|
||||
isExpanded: false
|
||||
@@ -188,6 +186,8 @@
|
||||
title: torchao
|
||||
- local: quantization/quanto
|
||||
title: quanto
|
||||
- local: quantization/modelopt
|
||||
title: NVIDIA ModelOpt
|
||||
|
||||
- title: Model accelerators and hardware
|
||||
isExpanded: false
|
||||
|
||||
@@ -120,6 +120,12 @@ The `guidance_scale` parameter in the pipeline is there to support future guidan
|
||||
- all
|
||||
- __call__
|
||||
|
||||
## QwenImageEditInpaintPipeline
|
||||
|
||||
[[autodoc]] QwenImageEditInpaintPipeline
|
||||
- all
|
||||
- __call__
|
||||
|
||||
## QwenImaggeControlNetPipeline
|
||||
- all
|
||||
- __call__
|
||||
|
||||
@@ -51,10 +51,10 @@ t2i_pipeline = t2i_blocks.init_pipeline(modular_repo_id, components_manager=comp
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
Components are only loaded and registered when using [`~ModularPipeline.load_components`] or [`~ModularPipeline.load_default_components`]. The example below uses [`~ModularPipeline.load_default_components`] to create a second pipeline that reuses all the components from the first one, and assigns it to a different collection
|
||||
Components are only loaded and registered when using [`~ModularPipeline.load_components`] or [`~ModularPipeline.load_components`]. The example below uses [`~ModularPipeline.load_components`] to create a second pipeline that reuses all the components from the first one, and assigns it to a different collection
|
||||
|
||||
```py
|
||||
pipe.load_default_components()
|
||||
pipe.load_components()
|
||||
pipe2 = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test2")
|
||||
```
|
||||
|
||||
@@ -187,4 +187,4 @@ comp.enable_auto_cpu_offload(device="cuda")
|
||||
|
||||
All models begin on the CPU and [`ComponentsManager`] moves them to the appropriate device right before they're needed, and moves other models back to the CPU when GPU memory is low.
|
||||
|
||||
You can set your own rules for which models to offload first.
|
||||
You can set your own rules for which models to offload first.
|
||||
|
||||
@@ -75,13 +75,13 @@ Guiders that are already saved on the Hub with a `modular_model_index.json` file
|
||||
}
|
||||
```
|
||||
|
||||
The guider is only created after calling [`~ModularPipeline.load_default_components`] based on the loading specification in `modular_model_index.json`.
|
||||
The guider is only created after calling [`~ModularPipeline.load_components`] based on the loading specification in `modular_model_index.json`.
|
||||
|
||||
```py
|
||||
t2i_pipeline = t2i_blocks.init_pipeline("YiYiXu/modular-doc-guider")
|
||||
# not created during init
|
||||
assert t2i_pipeline.guider is None
|
||||
t2i_pipeline.load_default_components()
|
||||
t2i_pipeline.load_components()
|
||||
# loaded as PAG guider
|
||||
t2i_pipeline.guider
|
||||
```
|
||||
@@ -172,4 +172,4 @@ t2i_pipeline.push_to_hub("YiYiXu/modular-doc-guider")
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
</hfoptions>
|
||||
|
||||
@@ -29,7 +29,7 @@ blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS)
|
||||
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
|
||||
pipeline = blocks.init_pipeline(modular_repo_id)
|
||||
|
||||
pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
pipeline.load_components(torch_dtype=torch.float16)
|
||||
pipeline.to("cuda")
|
||||
|
||||
image = pipeline(prompt="Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", output="images")[0]
|
||||
@@ -49,7 +49,7 @@ blocks = SequentialPipelineBlocks.from_blocks_dict(IMAGE2IMAGE_BLOCKS)
|
||||
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
|
||||
pipeline = blocks.init_pipeline(modular_repo_id)
|
||||
|
||||
pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
pipeline.load_components(torch_dtype=torch.float16)
|
||||
pipeline.to("cuda")
|
||||
|
||||
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
|
||||
@@ -73,7 +73,7 @@ blocks = SequentialPipelineBlocks.from_blocks_dict(INPAINT_BLOCKS)
|
||||
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
|
||||
pipeline = blocks.init_pipeline(modular_repo_id)
|
||||
|
||||
pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
pipeline.load_components(torch_dtype=torch.float16)
|
||||
pipeline.to("cuda")
|
||||
|
||||
img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
|
||||
@@ -176,15 +176,15 @@ diffdiff_pipeline = ModularPipeline.from_pretrained(modular_repo_id, trust_remot
|
||||
|
||||
## Loading components
|
||||
|
||||
A [`ModularPipeline`] doesn't automatically instantiate with components. It only loads the configuration and component specifications. You can load all components with [`~ModularPipeline.load_default_components`] or only load specific components with [`~ModularPipeline.load_components`].
|
||||
A [`ModularPipeline`] doesn't automatically instantiate with components. It only loads the configuration and component specifications. You can load all components with [`~ModularPipeline.load_components`] or only load specific components with [`~ModularPipeline.load_components`].
|
||||
|
||||
<hfoptions id="load">
|
||||
<hfoption id="load_default_components">
|
||||
<hfoption id="load_components">
|
||||
|
||||
```py
|
||||
import torch
|
||||
|
||||
t2i_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
t2i_pipeline.load_components(torch_dtype=torch.float16)
|
||||
t2i_pipeline.to("cuda")
|
||||
```
|
||||
|
||||
@@ -355,4 +355,4 @@ The [config.json](https://huggingface.co/YiYiXu/modular-diffdiff-0704/blob/main/
|
||||
"ModularPipelineBlocks": "block.DiffDiffBlocks"
|
||||
}
|
||||
}
|
||||
```
|
||||
```
|
||||
|
||||
@@ -173,9 +173,9 @@ print(dd_blocks)
|
||||
|
||||
## ModularPipeline
|
||||
|
||||
Convert the [`SequentialPipelineBlocks`] into a [`ModularPipeline`] with the [`ModularPipeline.init_pipeline`] method. This initializes the expected components to load from a `modular_model_index.json` file. Explicitly load the components by calling [`ModularPipeline.load_default_components`].
|
||||
Convert the [`SequentialPipelineBlocks`] into a [`ModularPipeline`] with the [`ModularPipeline.init_pipeline`] method. This initializes the expected components to load from a `modular_model_index.json` file. Explicitly load the components by calling [`ModularPipeline.load_components`].
|
||||
|
||||
It is a good idea to initialize the [`ComponentManager`] with the pipeline to help manage the different components. Once you call [`~ModularPipeline.load_default_components`], the components are registered to the [`ComponentManager`] and can be shared between workflows. The example below uses the `collection` argument to assign the components a `"diffdiff"` label for better organization.
|
||||
It is a good idea to initialize the [`ComponentManager`] with the pipeline to help manage the different components. Once you call [`~ModularPipeline.load_components`], the components are registered to the [`ComponentManager`] and can be shared between workflows. The example below uses the `collection` argument to assign the components a `"diffdiff"` label for better organization.
|
||||
|
||||
```py
|
||||
from diffusers.modular_pipelines import ComponentsManager
|
||||
@@ -209,11 +209,11 @@ Use the [`sub_blocks.insert`] method to insert it into the [`ModularPipeline`].
|
||||
dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0)
|
||||
```
|
||||
|
||||
Call [`~ModularPipeline.init_pipeline`] to initialize a [`ModularPipeline`] and use [`~ModularPipeline.load_default_components`] to load the model components. Load and set the IP-Adapter to run the pipeline.
|
||||
Call [`~ModularPipeline.init_pipeline`] to initialize a [`ModularPipeline`] and use [`~ModularPipeline.load_components`] to load the model components. Load and set the IP-Adapter to run the pipeline.
|
||||
|
||||
```py
|
||||
dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
|
||||
dd_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.load_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.loader.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
||||
dd_pipeline.loader.set_ip_adapter_scale(0.6)
|
||||
dd_pipeline = dd_pipeline.to(device)
|
||||
@@ -260,14 +260,14 @@ class SDXLDiffDiffControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
|
||||
controlnet_denoise_block = SDXLDiffDiffControlNetDenoiseStep()
|
||||
```
|
||||
|
||||
Insert the `controlnet_input` block and replace the `denoise` block with the new `controlnet_denoise_block`. Initialize a [`ModularPipeline`] and [`~ModularPipeline.load_default_components`] into it.
|
||||
Insert the `controlnet_input` block and replace the `denoise` block with the new `controlnet_denoise_block`. Initialize a [`ModularPipeline`] and [`~ModularPipeline.load_components`] into it.
|
||||
|
||||
```py
|
||||
dd_blocks.sub_blocks.insert("controlnet_input", control_input_block, 7)
|
||||
dd_blocks.sub_blocks["denoise"] = controlnet_denoise_block
|
||||
|
||||
dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
|
||||
dd_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.load_components(torch_dtype=torch.float16)
|
||||
dd_pipeline = dd_pipeline.to(device)
|
||||
|
||||
control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg")
|
||||
@@ -320,7 +320,7 @@ Call [`SequentialPipelineBlocks.from_blocks_dict`] to create a [`SequentialPipel
|
||||
```py
|
||||
dd_auto_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_AUTO_BLOCKS)
|
||||
dd_pipeline = dd_auto_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
|
||||
dd_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.load_components(torch_dtype=torch.float16)
|
||||
```
|
||||
|
||||
## Share
|
||||
@@ -340,5 +340,5 @@ from diffusers.modular_pipelines import ModularPipeline, ComponentsManager
|
||||
components = ComponentsManager()
|
||||
|
||||
diffdiff_pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-diffdiff-0704", trust_remote_code=True, components_manager=components, collection="diffdiff")
|
||||
diffdiff_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
```
|
||||
diffdiff_pipeline.load_components(torch_dtype=torch.float16)
|
||||
```
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License. -->
|
||||
|
||||
# NVIDIA ModelOpt
|
||||
|
||||
[NVIDIA-ModelOpt](https://github.com/NVIDIA/TensorRT-Model-Optimizer) is a unified library of state-of-the-art model optimization techniques like quantization, pruning, distillation, speculative decoding, etc. It compresses deep learning models for downstream deployment frameworks like TensorRT-LLM or TensorRT to optimize inference speed.
|
||||
|
||||
Before you begin, make sure you have nvidia_modelopt installed.
|
||||
|
||||
```bash
|
||||
pip install -U "nvidia_modelopt[hf]"
|
||||
```
|
||||
|
||||
Quantize a model by passing [`NVIDIAModelOptConfig`] to [`~ModelMixin.from_pretrained`] (you can also load pre-quantized models). This works for any model in any modality, as long as it supports loading with [Accelerate](https://hf.co/docs/accelerate/index) and contains `torch.nn.Linear` layers.
|
||||
|
||||
The example below only quantizes the weights to FP8.
|
||||
|
||||
```python
|
||||
import torch
|
||||
from diffusers import AutoModel, SanaPipeline, NVIDIAModelOptConfig
|
||||
|
||||
model_id = "Efficient-Large-Model/Sana_600M_1024px_diffusers"
|
||||
dtype = torch.bfloat16
|
||||
|
||||
quantization_config = NVIDIAModelOptConfig(quant_type="FP8", quant_method="modelopt")
|
||||
transformer = AutoModel.from_pretrained(
|
||||
model_id,
|
||||
subfolder="transformer",
|
||||
quantization_config=quantization_config,
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
pipe = SanaPipeline.from_pretrained(
|
||||
model_id,
|
||||
transformer=transformer,
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
pipe.to("cuda")
|
||||
|
||||
print(f"Pipeline memory usage: {torch.cuda.max_memory_reserved() / 1024**3:.3f} GB")
|
||||
|
||||
prompt = "A cat holding a sign that says hello world"
|
||||
image = pipe(
|
||||
prompt, num_inference_steps=50, guidance_scale=4.5, max_sequence_length=512
|
||||
).images[0]
|
||||
image.save("output.png")
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> The quantization methods in NVIDIA-ModelOpt are designed to reduce the memory footprint of model weights using various QAT (Quantization-Aware Training) and PTQ (Post-Training Quantization) techniques while maintaining model performance. However, the actual performance gain during inference depends on the deployment framework (e.g., TRT-LLM, TensorRT) and the specific hardware configuration.
|
||||
>
|
||||
> More details can be found [here](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples).
|
||||
|
||||
## NVIDIAModelOptConfig
|
||||
|
||||
The `NVIDIAModelOptConfig` class accepts three parameters:
|
||||
- `quant_type`: A string value mentioning one of the quantization types below.
|
||||
- `modules_to_not_convert`: A list of module full/partial module names for which quantization should not be performed. For example, to not perform any quantization of the [`SD3Transformer2DModel`]'s pos_embed projection blocks, one would specify: `modules_to_not_convert=["pos_embed.proj.weight"]`.
|
||||
- `disable_conv_quantization`: A boolean value which when set to `True` disables quantization for all convolutional layers in the model. This is useful as channel and block quantization generally don't work well with convolutional layers (used with INT4, NF4, NVFP4). If you want to disable quantization for specific convolutional layers, use `modules_to_not_convert` instead.
|
||||
- `algorithm`: The algorithm to use for determining scale, defaults to `"max"`. You can check modelopt documentation for more algorithms and details.
|
||||
- `forward_loop`: The forward loop function to use for calibrating activation during quantization. If not provided, it relies on static scale values computed using the weights only.
|
||||
- `kwargs`: A dict of keyword arguments to pass to the underlying quantization method which will be invoked based on `quant_type`.
|
||||
|
||||
## Supported quantization types
|
||||
|
||||
ModelOpt supports weight-only, channel and block quantization int8, fp8, int4, nf4, and nvfp4. The quantization methods are designed to reduce the memory footprint of the model weights while maintaining the performance of the model during inference.
|
||||
|
||||
Weight-only quantization stores the model weights in a specific low-bit data type but performs computation with a higher-precision data type, like `bfloat16`. This lowers the memory requirements from model weights but retains the memory peaks for activation computation.
|
||||
|
||||
The quantization methods supported are as follows:
|
||||
|
||||
| **Quantization Type** | **Supported Schemes** | **Required Kwargs** | **Additional Notes** |
|
||||
|-----------------------|-----------------------|---------------------|----------------------|
|
||||
| **INT8** | `int8 weight only`, `int8 channel quantization`, `int8 block quantization` | `quant_type`, `quant_type + channel_quantize`, `quant_type + channel_quantize + block_quantize` |
|
||||
| **FP8** | `fp8 weight only`, `fp8 channel quantization`, `fp8 block quantization` | `quant_type`, `quant_type + channel_quantize`, `quant_type + channel_quantize + block_quantize` |
|
||||
| **INT4** | `int4 weight only`, `int4 block quantization` | `quant_type`, `quant_type + channel_quantize + block_quantize` | `channel_quantize = -1 is only supported for now`|
|
||||
| **NF4** | `nf4 weight only`, `nf4 double block quantization` | `quant_type`, `quant_type + channel_quantize + block_quantize + scale_channel_quantize` + `scale_block_quantize` | `channel_quantize = -1 and scale_channel_quantize = -1 are only supported for now` |
|
||||
| **NVFP4** | `nvfp4 weight only`, `nvfp4 block quantization` | `quant_type`, `quant_type + channel_quantize + block_quantize` | `channel_quantize = -1 is only supported for now`|
|
||||
|
||||
|
||||
Refer to the [official modelopt documentation](https://nvidia.github.io/TensorRT-Model-Optimizer/) for a better understanding of the available quantization methods and the exhaustive list of configuration options available.
|
||||
|
||||
## Serializing and Deserializing quantized models
|
||||
|
||||
To serialize a quantized model in a given dtype, first load the model with the desired quantization dtype and then save it using the [`~ModelMixin.save_pretrained`] method.
|
||||
|
||||
```python
|
||||
import torch
|
||||
from diffusers import AutoModel, NVIDIAModelOptConfig
|
||||
from modelopt.torch.opt import enable_huggingface_checkpointing
|
||||
|
||||
enable_huggingface_checkpointing()
|
||||
|
||||
model_id = "Efficient-Large-Model/Sana_600M_1024px_diffusers"
|
||||
quant_config_fp8 = {"quant_type": "FP8", "quant_method": "modelopt"}
|
||||
quant_config_fp8 = NVIDIAModelOptConfig(**quant_config_fp8)
|
||||
model = AutoModel.from_pretrained(
|
||||
model_id,
|
||||
subfolder="transformer",
|
||||
quantization_config=quant_config_fp8,
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
model.save_pretrained('path/to/sana_fp8', safe_serialization=False)
|
||||
```
|
||||
|
||||
To load a serialized quantized model, use the [`~ModelMixin.from_pretrained`] method.
|
||||
|
||||
```python
|
||||
import torch
|
||||
from diffusers import AutoModel, NVIDIAModelOptConfig, SanaPipeline
|
||||
from modelopt.torch.opt import enable_huggingface_checkpointing
|
||||
|
||||
enable_huggingface_checkpointing()
|
||||
|
||||
quantization_config = NVIDIAModelOptConfig(quant_type="FP8", quant_method="modelopt")
|
||||
transformer = AutoModel.from_pretrained(
|
||||
"path/to/sana_fp8",
|
||||
subfolder="transformer",
|
||||
quantization_config=quantization_config,
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
pipe = SanaPipeline.from_pretrained(
|
||||
"Efficient-Large-Model/Sana_600M_1024px_diffusers",
|
||||
transformer=transformer,
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
pipe.to("cuda")
|
||||
prompt = "A cat holding a sign that says hello world"
|
||||
image = pipe(
|
||||
prompt, num_inference_steps=50, guidance_scale=4.5, max_sequence_length=512
|
||||
).images[0]
|
||||
image.save("output.png")
|
||||
```
|
||||
@@ -223,7 +223,7 @@ from diffusers.image_processor import VaeImageProcessor
|
||||
import torch
|
||||
|
||||
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=torch.bfloat16).to("cuda")
|
||||
vae_scale_factor = 2 ** (len(vae.config.block_out_channels))
|
||||
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
|
||||
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
|
||||
|
||||
with torch.no_grad():
|
||||
|
||||
@@ -12,112 +12,56 @@ specific language governing permissions and limitations under the License.
|
||||
|
||||
# AutoPipeline
|
||||
|
||||
Diffusers provides many pipelines for basic tasks like generating images, videos, audio, and inpainting. On top of these, there are specialized pipelines for adapters and features like upscaling, super-resolution, and more. Different pipeline classes can even use the same checkpoint because they share the same pretrained model! With so many different pipelines, it can be overwhelming to know which pipeline class to use.
|
||||
[AutoPipeline](../api/models/auto_model) is a *task-and-model* pipeline that automatically selects the correct pipeline subclass based on the task. It handles the complexity of loading different pipeline subclasses without needing to know the specific pipeline subclass name.
|
||||
|
||||
The [AutoPipeline](../api/pipelines/auto_pipeline) class is designed to simplify the variety of pipelines in Diffusers. It is a generic *task-first* pipeline that lets you focus on a task ([`AutoPipelineForText2Image`], [`AutoPipelineForImage2Image`], and [`AutoPipelineForInpainting`]) without needing to know the specific pipeline class. The [AutoPipeline](../api/pipelines/auto_pipeline) automatically detects the correct pipeline class to use.
|
||||
This is unlike [`DiffusionPipeline`], a *model-only* pipeline that automatically selects the pipeline subclass based on the model.
|
||||
|
||||
For example, let's use the [dreamlike-art/dreamlike-photoreal-2.0](https://hf.co/dreamlike-art/dreamlike-photoreal-2.0) checkpoint.
|
||||
|
||||
Under the hood, [AutoPipeline](../api/pipelines/auto_pipeline):
|
||||
|
||||
1. Detects a `"stable-diffusion"` class from the [model_index.json](https://hf.co/dreamlike-art/dreamlike-photoreal-2.0/blob/main/model_index.json) file.
|
||||
2. Depending on the task you're interested in, it loads the [`StableDiffusionPipeline`], [`StableDiffusionImg2ImgPipeline`], or [`StableDiffusionInpaintPipeline`]. Any parameter (`strength`, `num_inference_steps`, etc.) you would pass to these specific pipelines can also be passed to the [AutoPipeline](../api/pipelines/auto_pipeline).
|
||||
|
||||
<hfoptions id="autopipeline">
|
||||
<hfoption id="text-to-image">
|
||||
[`AutoPipelineForImage2Image`] returns a specific pipeline subclass, (for example, [`StableDiffusionXLImg2ImgPipeline`]), which can only be used for image-to-image tasks.
|
||||
|
||||
```py
|
||||
from diffusers import AutoPipelineForText2Image
|
||||
import torch
|
||||
|
||||
pipe_txt2img = AutoPipelineForText2Image.from_pretrained(
|
||||
"dreamlike-art/dreamlike-photoreal-2.0", torch_dtype=torch.float16, use_safetensors=True
|
||||
).to("cuda")
|
||||
|
||||
prompt = "cinematic photo of Godzilla eating sushi with a cat in a izakaya, 35mm photograph, film, professional, 4k, highly detailed"
|
||||
generator = torch.Generator(device="cpu").manual_seed(37)
|
||||
image = pipe_txt2img(prompt, generator=generator).images[0]
|
||||
image
|
||||
```
|
||||
|
||||
<div class="flex justify-center">
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-text2img.png"/>
|
||||
</div>
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="image-to-image">
|
||||
|
||||
```py
|
||||
from diffusers import AutoPipelineForImage2Image
|
||||
from diffusers.utils import load_image
|
||||
import torch
|
||||
|
||||
pipe_img2img = AutoPipelineForImage2Image.from_pretrained(
|
||||
"dreamlike-art/dreamlike-photoreal-2.0", torch_dtype=torch.float16, use_safetensors=True
|
||||
).to("cuda")
|
||||
|
||||
init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-text2img.png")
|
||||
|
||||
prompt = "cinematic photo of Godzilla eating burgers with a cat in a fast food restaurant, 35mm photograph, film, professional, 4k, highly detailed"
|
||||
generator = torch.Generator(device="cpu").manual_seed(53)
|
||||
image = pipe_img2img(prompt, image=init_image, generator=generator).images[0]
|
||||
image
|
||||
```
|
||||
|
||||
Notice how the [dreamlike-art/dreamlike-photoreal-2.0](https://hf.co/dreamlike-art/dreamlike-photoreal-2.0) checkpoint is used for both text-to-image and image-to-image tasks? To save memory and avoid loading the checkpoint twice, use the [`~DiffusionPipeline.from_pipe`] method.
|
||||
|
||||
```py
|
||||
pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img).to("cuda")
|
||||
image = pipeline(prompt, image=init_image, generator=generator).images[0]
|
||||
image
|
||||
```
|
||||
|
||||
You can learn more about the [`~DiffusionPipeline.from_pipe`] method in the [Reuse a pipeline](../using-diffusers/loading#reuse-a-pipeline) guide.
|
||||
|
||||
<div class="flex justify-center">
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-img2img.png"/>
|
||||
</div>
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="inpainting">
|
||||
|
||||
```py
|
||||
from diffusers import AutoPipelineForInpainting
|
||||
from diffusers.utils import load_image
|
||||
import torch
|
||||
|
||||
pipeline = AutoPipelineForInpainting.from_pretrained(
|
||||
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True
|
||||
).to("cuda")
|
||||
|
||||
init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-img2img.png")
|
||||
mask_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-mask.png")
|
||||
|
||||
prompt = "cinematic photo of a owl, 35mm photograph, film, professional, 4k, highly detailed"
|
||||
generator = torch.Generator(device="cpu").manual_seed(38)
|
||||
image = pipeline(prompt, image=init_image, mask_image=mask_image, generator=generator, strength=0.4).images[0]
|
||||
image
|
||||
```
|
||||
|
||||
<div class="flex justify-center">
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/autopipeline-inpaint.png"/>
|
||||
</div>
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
## Unsupported checkpoints
|
||||
|
||||
The [AutoPipeline](../api/pipelines/auto_pipeline) supports [Stable Diffusion](../api/pipelines/stable_diffusion/overview), [Stable Diffusion XL](../api/pipelines/stable_diffusion/stable_diffusion_xl), [ControlNet](../api/pipelines/controlnet), [Kandinsky 2.1](../api/pipelines/kandinsky.md), [Kandinsky 2.2](../api/pipelines/kandinsky_v22), and [DeepFloyd IF](../api/pipelines/deepfloyd_if) checkpoints.
|
||||
|
||||
If you try to load an unsupported checkpoint, you'll get an error.
|
||||
|
||||
```py
|
||||
from diffusers import AutoPipelineForImage2Image
|
||||
import torch
|
||||
|
||||
pipeline = AutoPipelineForImage2Image.from_pretrained(
|
||||
"openai/shap-e-img2img", torch_dtype=torch.float16, use_safetensors=True
|
||||
"RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.bfloat16, device_map="cuda",
|
||||
)
|
||||
print(pipeline)
|
||||
"StableDiffusionXLImg2ImgPipeline {
|
||||
"_class_name": "StableDiffusionXLImg2ImgPipeline",
|
||||
...
|
||||
"
|
||||
```
|
||||
|
||||
Loading the same model with [`DiffusionPipeline`] returns the [`StableDiffusionXLPipeline`] subclass. It can be used for text-to-image, image-to-image, or inpainting tasks depending on the inputs.
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import DiffusionPipeline
|
||||
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
"RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.bfloat16, device_map="cuda",
|
||||
)
|
||||
print(pipeline)
|
||||
"StableDiffusionXLPipeline {
|
||||
"_class_name": "StableDiffusionXLPipeline",
|
||||
...
|
||||
"
|
||||
```
|
||||
|
||||
Check the [mappings](https://github.com/huggingface/diffusers/blob/130fd8df54f24ffb006d84787b598d8adc899f23/src/diffusers/pipelines/auto_pipeline.py#L114) to see whether a model is supported or not.
|
||||
|
||||
Trying to load an unsupported model returns an error.
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import AutoPipelineForImage2Image
|
||||
|
||||
pipeline = AutoPipelineForImage2Image.from_pretrained(
|
||||
"openai/shap-e-img2img", torch_dtype=torch.float16,
|
||||
)
|
||||
"ValueError: AutoPipeline can't find a pipeline linked to ShapEImg2ImgPipeline for None"
|
||||
```
|
||||
|
||||
There are three types of [AutoPipeline](../api/models/auto_model) classes, [`AutoPipelineForText2Image`], [`AutoPipelineForImage2Image`] and [`AutoPipelineForInpainting`]. Each of these classes have a predefined mapping, linking a pipeline to their task-specific subclass.
|
||||
|
||||
When [`~AutoPipelineForText2Image.from_pretrained`] is called, it extracts the class name from the `model_index.json` file and selects the appropriate pipeline subclass for the task based on the mapping.
|
||||
@@ -10,13 +10,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
# Controlling image quality
|
||||
|
||||
The components of a diffusion model, like the UNet and scheduler, can be optimized to improve the quality of generated images leading to better details. These techniques are especially useful if you don't have the resources to simply use a larger model for inference. You can enable these techniques during inference without any additional training.
|
||||
|
||||
This guide will show you how to turn these techniques on in your pipeline and how to configure them to improve the quality of your generated images.
|
||||
|
||||
## Details
|
||||
# FreeU
|
||||
|
||||
[FreeU](https://hf.co/papers/2309.11497) improves image details by rebalancing the UNet's backbone and skip connection weights. The skip connections can cause the model to overlook some of the backbone semantics which may lead to unnatural image details in the generated image. This technique does not require any additional training and can be applied on the fly during inference for tasks like image-to-image and text-to-video.
|
||||
|
||||
@@ -139,7 +133,7 @@ export_to_video(video_frames, "teddy_bear.mp4", fps=10)
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
Call the [`pipelines.StableDiffusionMixin.disable_freeu`] method to disable FreeU.
|
||||
Call the [`~pipelines.StableDiffusionMixin.disable_freeu`] method to disable FreeU.
|
||||
|
||||
```py
|
||||
pipeline.disable_freeu()
|
||||
|
||||
@@ -108,23 +108,20 @@ print(pipeline.transformer.dtype, pipeline.vae.dtype)
|
||||
|
||||
The `device_map` argument determines individual model or pipeline placement on an accelerator like a GPU. It is especially helpful when there are multiple GPUs.
|
||||
|
||||
Diffusers currently provides three options to `device_map`, `"cuda"`, `"balanced"` and `"auto"`. Refer to the table below to compare the three placement strategies.
|
||||
A pipeline supports two options for `device_map`, `"cuda"` and `"balanced"`. Refer to the table below to compare the placement strategies.
|
||||
|
||||
| parameter | description |
|
||||
|---|---|
|
||||
| `"cuda"` | places model or pipeline on CUDA device |
|
||||
| `"balanced"` | evenly distributes model or pipeline on all GPUs |
|
||||
| `"auto"` | distribute model from fastest device first to slowest |
|
||||
| `"cuda"` | places pipeline on a supported accelerator device like CUDA |
|
||||
| `"balanced"` | evenly distributes pipeline on all GPUs |
|
||||
|
||||
Use the `max_memory` argument in [`~DiffusionPipeline.from_pretrained`] to allocate a maximum amount of memory to use on each device. By default, Diffusers uses the maximum amount available.
|
||||
|
||||
<hfoptions id="device_map">
|
||||
<hfoption id="pipeline">
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import DiffusionPipeline
|
||||
|
||||
max_memory = {0: "16GB", 1: "16GB"}
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
"Qwen/Qwen-Image",
|
||||
torch_dtype=torch.bfloat16,
|
||||
@@ -132,26 +129,6 @@ pipeline = DiffusionPipeline.from_pretrained(
|
||||
)
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="individual model">
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import AutoModel
|
||||
|
||||
max_memory = {0: "16GB", 1: "16GB"}
|
||||
transformer = AutoModel.from_pretrained(
|
||||
"Qwen/Qwen-Image",
|
||||
subfolder="transformer",
|
||||
torch_dtype=torch.bfloat16
|
||||
device_map="cuda",
|
||||
max_memory=max_memory
|
||||
)
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
The `hf_device_map` attribute allows you to access and view the `device_map`.
|
||||
|
||||
```py
|
||||
@@ -189,22 +166,18 @@ pipeline = DiffusionPipeline.from_pretrained(
|
||||
|
||||
[`DiffusionPipeline`] is flexible and accommodates loading different models or schedulers. You can experiment with different schedulers to optimize for generation speed or quality, and you can replace models with more performant ones.
|
||||
|
||||
The example below swaps the default scheduler to generate higher quality images and a more stable VAE version. Pass the `subfolder` argument in [`~HeunDiscreteScheduler.from_pretrained`] to load the scheduler to the correct subfolder.
|
||||
The example below uses a more stable VAE version.
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import DiffusionPipeline, HeunDiscreteScheduler, AutoModel
|
||||
from diffusers import DiffusionPipeline, AutoModel
|
||||
|
||||
scheduler = HeunDiscreteScheduler.from_pretrained(
|
||||
"stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler"
|
||||
)
|
||||
vae = AutoModel.from_pretrained(
|
||||
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
|
||||
)
|
||||
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||
scheduler=scheduler,
|
||||
vae=vae,
|
||||
torch_dtype=torch.float16,
|
||||
device_map="cuda"
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
[[open-in-colab]]
|
||||
|
||||
# Models
|
||||
|
||||
A diffusion model relies on a few individual models working together to generate an output. These models are responsible for denoising, encoding inputs, and decoding latents into the actual outputs.
|
||||
|
||||
This guide will show you how to load models.
|
||||
|
||||
## Loading a model
|
||||
|
||||
All models are loaded with the [`~ModelMixin.from_pretrained`] method, which downloads and caches the latest model version. If the latest files are available in the local cache, [`~ModelMixin.from_pretrained`] reuses files in the cache.
|
||||
|
||||
Pass the `subfolder` argument to [`~ModelMixin.from_pretrained`] to specify where to load the model weights from. Omit the `subfolder` argument if the repository doesn't have a subfolder structure or if you're loading a standalone model.
|
||||
|
||||
```py
|
||||
from diffusers import QwenImageTransformer2DModel
|
||||
|
||||
model = QwenImageTransformer2DModel.from_pretrained("Qwen/Qwen-Image", subfolder="transformer")
|
||||
```
|
||||
|
||||
## AutoModel
|
||||
|
||||
[`AutoModel`] detects the model class from a `model_index.json` file or a model's `config.json` file. It fetches the correct model class from these files and delegates the actual loading to the model class. [`AutoModel`] is useful for automatic model type detection without needing to know the exact model class beforehand.
|
||||
|
||||
```py
|
||||
from diffusers import AutoModel
|
||||
|
||||
model = AutoModel.from_pretrained(
|
||||
"Qwen/Qwen-Image", subfolder="transformer"
|
||||
)
|
||||
```
|
||||
|
||||
## Model data types
|
||||
|
||||
Use the `torch_dtype` argument in [`~ModelMixin.from_pretrained`] to load a model with a specific data type. This allows you to load a model in a lower precision to reduce memory usage.
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import QwenImageTransformer2DModel
|
||||
|
||||
model = QwenImageTransformer2DModel.from_pretrained(
|
||||
"Qwen/Qwen-Image",
|
||||
subfolder="transformer",
|
||||
torch_dtype=torch.bfloat16
|
||||
)
|
||||
```
|
||||
|
||||
[nn.Module.to](https://docs.pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.to) can also convert to a specific data type on the fly. However, it converts *all* weights to the requested data type unlike `torch_dtype` which respects `_keep_in_fp32_modules`. This argument preserves layers in `torch.float32` for numerical stability and best generation quality (see example [_keep_in_fp32_modules](https://github.com/huggingface/diffusers/blob/f864a9a352fa4a220d860bfdd1782e3e5af96382/src/diffusers/models/transformers/transformer_wan.py#L374))
|
||||
|
||||
```py
|
||||
from diffusers import QwenImageTransformer2DModel
|
||||
|
||||
model = QwenImageTransformer2DModel.from_pretrained(
|
||||
"Qwen/Qwen-Image", subfolder="transformer"
|
||||
)
|
||||
model = model.to(dtype=torch.float16)
|
||||
```
|
||||
|
||||
## Device placement
|
||||
|
||||
Use the `device_map` argument in [`~ModelMixin.from_pretrained`] to place a model on an accelerator like a GPU. It is especially helpful where there are multiple GPUs.
|
||||
|
||||
Diffusers currently provides three options to `device_map` for individual models, `"cuda"`, `"balanced"` and `"auto"`. Refer to the table below to compare the three placement strategies.
|
||||
|
||||
| parameter | description |
|
||||
|---|---|
|
||||
| `"cuda"` | places pipeline on a supported accelerator (CUDA) |
|
||||
| `"balanced"` | evenly distributes pipeline on all GPUs |
|
||||
| `"auto"` | distribute model from fastest device first to slowest |
|
||||
|
||||
Use the `max_memory` argument in [`~ModelMixin.from_pretrained`] to allocate a maximum amount of memory to use on each device. By default, Diffusers uses the maximum amount available.
|
||||
|
||||
```py
|
||||
import torch
|
||||
from diffusers import QwenImagePipeline
|
||||
|
||||
max_memory = {0: "16GB", 1: "16GB"}
|
||||
pipeline = QwenImagePipeline.from_pretrained(
|
||||
"Qwen/Qwen-Image",
|
||||
torch_dtype=torch.bfloat16,
|
||||
device_map="cuda",
|
||||
max_memory=max_memory
|
||||
)
|
||||
```
|
||||
|
||||
The `hf_device_map` attribute allows you to access and view the `device_map`.
|
||||
|
||||
```py
|
||||
print(transformer.hf_device_map)
|
||||
# {'': device(type='cuda')}
|
||||
```
|
||||
|
||||
## Saving models
|
||||
|
||||
Save a model with the [`~ModelMixin.save_pretrained`] method.
|
||||
|
||||
```py
|
||||
from diffusers import QwenImageTransformer2DModel
|
||||
|
||||
model = QwenImageTransformer2DModel.from_pretrained("Qwen/Qwen-Image", subfolder="transformer")
|
||||
model.save_pretrained("./local/model")
|
||||
```
|
||||
|
||||
For large models, it is helpful to use `max_shard_size` to save a model as multiple shards. A shard can be loaded faster and save memory (refer to the [parallel loading](./loading#parallel-loading) docs for more details), especially if there is more than one GPU.
|
||||
|
||||
```py
|
||||
model.save_pretrained("./local/model", max_shard_size="5GB")
|
||||
```
|
||||
@@ -10,19 +10,22 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
|
||||
# Push files to the Hub
|
||||
|
||||
[[open-in-colab]]
|
||||
|
||||
🤗 Diffusers provides a [`~diffusers.utils.PushToHubMixin`] for uploading your model, scheduler, or pipeline to the Hub. It is an easy way to store your files on the Hub, and also allows you to share your work with others. Under the hood, the [`~diffusers.utils.PushToHubMixin`]:
|
||||
# Sharing pipelines and models
|
||||
|
||||
Share your pipeline or models and schedulers on the Hub with the [`~diffusers.utils.PushToHubMixin`] class. This class:
|
||||
|
||||
1. creates a repository on the Hub
|
||||
2. saves your model, scheduler, or pipeline files so they can be reloaded later
|
||||
3. uploads folder containing these files to the Hub
|
||||
|
||||
This guide will show you how to use the [`~diffusers.utils.PushToHubMixin`] to upload your files to the Hub.
|
||||
This guide will show you how to upload your files to the Hub with the [`~diffusers.utils.PushToHubMixin`] class.
|
||||
|
||||
You'll need to log in to your Hub account with your access [token](https://huggingface.co/settings/tokens) first:
|
||||
Log in to your Hugging Face account with your access [token](https://huggingface.co/settings/tokens).
|
||||
|
||||
<hfoptions id="login">
|
||||
<hfoption id="notebook">
|
||||
|
||||
```py
|
||||
from huggingface_hub import notebook_login
|
||||
@@ -30,9 +33,19 @@ from huggingface_hub import notebook_login
|
||||
notebook_login()
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="hf CLI">
|
||||
|
||||
```bash
|
||||
hf auth login
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
## Models
|
||||
|
||||
To push a model to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the model to be stored on the Hub:
|
||||
To push a model to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the model.
|
||||
|
||||
```py
|
||||
from diffusers import ControlNetModel
|
||||
@@ -48,15 +61,9 @@ controlnet = ControlNetModel(
|
||||
controlnet.push_to_hub("my-controlnet-model")
|
||||
```
|
||||
|
||||
For models, you can also specify the [*variant*](loading#checkpoint-variants) of the weights to push to the Hub. For example, to push `fp16` weights:
|
||||
The [`~diffusers.utils.PushToHubMixin.push_to_hub`] method saves the model's `config.json` file and the weights are automatically saved as safetensors files.
|
||||
|
||||
```py
|
||||
controlnet.push_to_hub("my-controlnet-model", variant="fp16")
|
||||
```
|
||||
|
||||
The [`~diffusers.utils.PushToHubMixin.push_to_hub`] function saves the model's `config.json` file and the weights are automatically saved in the `safetensors` format.
|
||||
|
||||
Now you can reload the model from your repository on the Hub:
|
||||
Load the model again with [`~DiffusionPipeline.from_pretrained`].
|
||||
|
||||
```py
|
||||
model = ControlNetModel.from_pretrained("your-namespace/my-controlnet-model")
|
||||
@@ -64,7 +71,7 @@ model = ControlNetModel.from_pretrained("your-namespace/my-controlnet-model")
|
||||
|
||||
## Scheduler
|
||||
|
||||
To push a scheduler to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the scheduler to be stored on the Hub:
|
||||
To push a scheduler to the Hub, call [`~diffusers.utils.PushToHubMixin.push_to_hub`] and specify the repository id of the scheduler.
|
||||
|
||||
```py
|
||||
from diffusers import DDIMScheduler
|
||||
@@ -81,7 +88,7 @@ scheduler.push_to_hub("my-controlnet-scheduler")
|
||||
|
||||
The [`~diffusers.utils.PushToHubMixin.push_to_hub`] function saves the scheduler's `scheduler_config.json` file to the specified repository.
|
||||
|
||||
Now you can reload the scheduler from your repository on the Hub:
|
||||
Load the scheduler again with [`~SchedulerMixin.from_pretrained`].
|
||||
|
||||
```py
|
||||
scheduler = DDIMScheduler.from_pretrained("your-namepsace/my-controlnet-scheduler")
|
||||
@@ -89,7 +96,7 @@ scheduler = DDIMScheduler.from_pretrained("your-namepsace/my-controlnet-schedule
|
||||
|
||||
## Pipeline
|
||||
|
||||
You can also push an entire pipeline with all it's components to the Hub. For example, initialize the components of a [`StableDiffusionPipeline`] with the parameters you want:
|
||||
To push a pipeline to the Hub, initialize the pipeline components with your desired parameters.
|
||||
|
||||
```py
|
||||
from diffusers import (
|
||||
@@ -143,7 +150,7 @@ text_encoder = CLIPTextModel(text_encoder_config)
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
```
|
||||
|
||||
Pass all of the components to the [`StableDiffusionPipeline`] and call [`~diffusers.utils.PushToHubMixin.push_to_hub`] to push the pipeline to the Hub:
|
||||
Pass all components to the pipeline and call [`~diffusers.utils.PushToHubMixin.push_to_hub`].
|
||||
|
||||
```py
|
||||
components = {
|
||||
@@ -160,7 +167,7 @@ pipeline = StableDiffusionPipeline(**components)
|
||||
pipeline.push_to_hub("my-pipeline")
|
||||
```
|
||||
|
||||
The [`~diffusers.utils.PushToHubMixin.push_to_hub`] function saves each component to a subfolder in the repository. Now you can reload the pipeline from your repository on the Hub:
|
||||
The [`~diffusers.utils.PushToHubMixin.push_to_hub`] method saves each component to a subfolder in the repository. Load the pipeline again with [`~DiffusionPipeline.from_pretrained`].
|
||||
|
||||
```py
|
||||
pipeline = StableDiffusionPipeline.from_pretrained("your-namespace/my-pipeline")
|
||||
@@ -168,10 +175,10 @@ pipeline = StableDiffusionPipeline.from_pretrained("your-namespace/my-pipeline")
|
||||
|
||||
## Privacy
|
||||
|
||||
Set `private=True` in the [`~diffusers.utils.PushToHubMixin.push_to_hub`] function to keep your model, scheduler, or pipeline files private:
|
||||
Set `private=True` in [`~diffusers.utils.PushToHubMixin.push_to_hub`] to keep a model, scheduler, or pipeline files private.
|
||||
|
||||
```py
|
||||
controlnet.push_to_hub("my-controlnet-model-private", private=True)
|
||||
```
|
||||
|
||||
Private repositories are only visible to you, and other users won't be able to clone the repository and your repository won't appear in search results. Even if a user has the URL to your private repository, they'll receive a `404 - Sorry, we can't find the page you are looking for`. You must be [logged in](https://huggingface.co/docs/huggingface_hub/quick-start#login) to load a model from a private repository.
|
||||
Private repositories are only visible to you. Other users won't be able to clone the repository and it won't appear in search results. Even if a user has the URL to your private repository, they'll receive a `404 - Sorry, we can't find the page you are looking for`. You must be [logged in](https://huggingface.co/docs/huggingface_hub/quick-start#login) to load a model from a private repository.
|
||||
@@ -48,10 +48,10 @@ t2i_pipeline = t2i_blocks.init_pipeline(modular_repo_id, components_manager=comp
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
组件仅在调用 [`~ModularPipeline.load_components`] 或 [`~ModularPipeline.load_default_components`] 时加载和注册。以下示例使用 [`~ModularPipeline.load_default_components`] 创建第二个管道,重用第一个管道的所有组件,并将其分配到不同的集合。
|
||||
组件仅在调用 [`~ModularPipeline.load_components`] 或 [`~ModularPipeline.load_components`] 时加载和注册。以下示例使用 [`~ModularPipeline.load_components`] 创建第二个管道,重用第一个管道的所有组件,并将其分配到不同的集合。
|
||||
|
||||
```py
|
||||
pipe.load_default_components()
|
||||
pipe.load_components()
|
||||
pipe2 = ModularPipeline.from_pretrained("YiYiXu/modular-demo-auto", components_manager=comp, collection="test2")
|
||||
```
|
||||
|
||||
@@ -185,4 +185,4 @@ comp.enable_auto_cpu_offload(device="cuda")
|
||||
|
||||
所有模型开始时都在 CPU 上,[`ComponentsManager`] 在需要它们之前将它们移动到适当的设备,并在 GPU 内存不足时将其他模型移回 CPU。
|
||||
|
||||
您可以设置自己的规则来决定哪些模型要卸载。
|
||||
您可以设置自己的规则来决定哪些模型要卸载。
|
||||
|
||||
@@ -73,13 +73,13 @@ ComponentSpec(name='guider', type_hint=<class 'diffusers.guiders.perturbed_atten
|
||||
}
|
||||
```
|
||||
|
||||
引导器只有在调用 [`~ModularPipeline.load_default_components`] 之后才会创建,基于 `modular_model_index.json` 中的加载规范。
|
||||
引导器只有在调用 [`~ModularPipeline.load_components`] 之后才会创建,基于 `modular_model_index.json` 中的加载规范。
|
||||
|
||||
```py
|
||||
t2i_pipeline = t2i_blocks.init_pipeline("YiYiXu/modular-doc-guider")
|
||||
# 在初始化时未创建
|
||||
assert t2i_pipeline.guider is None
|
||||
t2i_pipeline.load_default_components()
|
||||
t2i_pipeline.load_components()
|
||||
# 加载为 PAG 引导器
|
||||
t2i_pipeline.guider
|
||||
```
|
||||
@@ -170,4 +170,4 @@ t2i_pipeline.push_to_hub("YiYiXu/modular-doc-guider")
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
</hfoptions>
|
||||
|
||||
@@ -28,7 +28,7 @@ blocks = SequentialPipelineBlocks.from_blocks_dict(TEXT2IMAGE_BLOCKS)
|
||||
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
|
||||
pipeline = blocks.init_pipeline(modular_repo_id)
|
||||
|
||||
pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
pipeline.load_components(torch_dtype=torch.float16)
|
||||
pipeline.to("cuda")
|
||||
|
||||
image = pipeline(prompt="Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", output="images")[0]
|
||||
@@ -48,7 +48,7 @@ blocks = SequentialPipelineBlocks.from_blocks_dict(IMAGE2IMAGE_BLOCKS)
|
||||
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
|
||||
pipeline = blocks.init_pipeline(modular_repo_id)
|
||||
|
||||
pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
pipeline.load_components(torch_dtype=torch.float16)
|
||||
pipeline.to("cuda")
|
||||
|
||||
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
|
||||
@@ -72,7 +72,7 @@ blocks = SequentialPipelineBlocks.from_blocks_dict(INPAINT_BLOCKS)
|
||||
modular_repo_id = "YiYiXu/modular-loader-t2i-0704"
|
||||
pipeline = blocks.init_pipeline(modular_repo_id)
|
||||
|
||||
pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
pipeline.load_components(torch_dtype=torch.float16)
|
||||
pipeline.to("cuda")
|
||||
|
||||
img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
|
||||
@@ -176,15 +176,15 @@ diffdiff_pipeline = ModularPipeline.from_pretrained(modular_repo_id, trust_remot
|
||||
|
||||
## 加载组件
|
||||
|
||||
一个[`ModularPipeline`]不会自动实例化组件。它只加载配置和组件规范。您可以使用[`~ModularPipeline.load_default_components`]加载所有组件,或仅使用[`~ModularPipeline.load_components`]加载特定组件。
|
||||
一个[`ModularPipeline`]不会自动实例化组件。它只加载配置和组件规范。您可以使用[`~ModularPipeline.load_components`]加载所有组件,或仅使用[`~ModularPipeline.load_components`]加载特定组件。
|
||||
|
||||
<hfoptions id="load">
|
||||
<hfoption id="load_default_components">
|
||||
<hfoption id="load_components">
|
||||
|
||||
```py
|
||||
import torch
|
||||
|
||||
t2i_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
t2i_pipeline.load_components(torch_dtype=torch.float16)
|
||||
t2i_pipeline.to("cuda")
|
||||
```
|
||||
|
||||
|
||||
@@ -175,7 +175,7 @@ print(dd_blocks)
|
||||
将 [`SequentialPipelineBlocks`] 转换为 [`ModularPipeline`],使用 [`ModularPipeline.init_pipeline`] 方法。这会初始化从 `modular_model_index.json` 文件加载的预期组件。通过调用 [`ModularPipeline.load_defau
|
||||
lt_components`]。
|
||||
|
||||
初始化[`ComponentManager`]时传入pipeline是一个好主意,以帮助管理不同的组件。一旦调用[`~ModularPipeline.load_default_components`],组件就会被注册到[`ComponentManager`]中,并且可以在工作流之间共享。下面的例子使用`collection`参数为组件分配了一个`"diffdiff"`标签,以便更好地组织。
|
||||
初始化[`ComponentManager`]时传入pipeline是一个好主意,以帮助管理不同的组件。一旦调用[`~ModularPipeline.load_components`],组件就会被注册到[`ComponentManager`]中,并且可以在工作流之间共享。下面的例子使用`collection`参数为组件分配了一个`"diffdiff"`标签,以便更好地组织。
|
||||
|
||||
```py
|
||||
from diffusers.modular_pipelines import ComponentsManager
|
||||
@@ -209,11 +209,11 @@ ip_adapter_block = StableDiffusionXLAutoIPAdapterStep()
|
||||
dd_blocks.sub_blocks.insert("ip_adapter", ip_adapter_block, 0)
|
||||
```
|
||||
|
||||
调用[`~ModularPipeline.init_pipeline`]来初始化一个[`ModularPipeline`],并使用[`~ModularPipeline.load_default_components`]加载模型组件。加载并设置IP-Adapter以运行pipeline。
|
||||
调用[`~ModularPipeline.init_pipeline`]来初始化一个[`ModularPipeline`],并使用[`~ModularPipeline.load_components`]加载模型组件。加载并设置IP-Adapter以运行pipeline。
|
||||
|
||||
```py
|
||||
dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
|
||||
dd_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.load_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.loader.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
||||
dd_pipeline.loader.set_ip_adapter_scale(0.6)
|
||||
dd_pipeline = dd_pipeline.to(device)
|
||||
@@ -261,14 +261,14 @@ class SDXLDiffDiffControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
|
||||
controlnet_denoise_block = SDXLDiffDiffControlNetDenoiseStep()
|
||||
```
|
||||
|
||||
插入 `controlnet_input` 块并用新的 `controlnet_denoise_block` 替换 `denoise` 块。初始化一个 [`ModularPipeline`] 并将 [`~ModularPipeline.load_default_components`] 加载到其中。
|
||||
插入 `controlnet_input` 块并用新的 `controlnet_denoise_block` 替换 `denoise` 块。初始化一个 [`ModularPipeline`] 并将 [`~ModularPipeline.load_components`] 加载到其中。
|
||||
|
||||
```py
|
||||
dd_blocks.sub_blocks.insert("controlnet_input", control_input_block, 7)
|
||||
dd_blocks.sub_blocks["denoise"] = controlnet_denoise_block
|
||||
|
||||
dd_pipeline = dd_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
|
||||
dd_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.load_components(torch_dtype=torch.float16)
|
||||
dd_pipeline = dd_pipeline.to(device)
|
||||
|
||||
control_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/diffdiff_tomato_canny.jpeg")
|
||||
@@ -322,7 +322,7 @@ DIFFDIFF_AUTO_BLOCKS.insert("controlnet_input",StableDiffusionXLControlNetAutoIn
|
||||
```py
|
||||
dd_auto_blocks = SequentialPipelineBlocks.from_blocks_dict(DIFFDIFF_AUTO_BLOCKS)
|
||||
dd_pipeline = dd_auto_blocks.init_pipeline("YiYiXu/modular-demo-auto", collection="diffdiff")
|
||||
dd_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
dd_pipeline.load_components(torch_dtype=torch.float16)
|
||||
```
|
||||
|
||||
## 分享
|
||||
@@ -342,5 +342,5 @@ from diffusers.modular_pipelines import ModularPipeline, ComponentsManager
|
||||
components = ComponentsManager()
|
||||
|
||||
diffdiff_pipeline = ModularPipeline.from_pretrained("YiYiXu/modular-diffdiff-0704", trust_remote_code=True, components_manager=components, collection="diffdiff")
|
||||
diffdiff_pipeline.load_default_components(torch_dtype=torch.float16)
|
||||
diffdiff_pipeline.load_components(torch_dtype=torch.float16)
|
||||
```
|
||||
|
||||
@@ -223,7 +223,7 @@ from diffusers.image_processor import VaeImageProcessor
|
||||
import torch
|
||||
|
||||
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=torch.bfloat16).to("cuda")
|
||||
vae_scale_factor = 2 ** (len(vae.config.block_out_channels))
|
||||
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
|
||||
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
|
||||
|
||||
with torch.no_grad():
|
||||
|
||||
@@ -1399,6 +1399,7 @@ def main(args):
|
||||
torch_dtype = torch.float16
|
||||
elif args.prior_generation_precision == "bf16":
|
||||
torch_dtype = torch.bfloat16
|
||||
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
@@ -1419,7 +1420,8 @@ def main(args):
|
||||
for example in tqdm(
|
||||
sample_dataloader, desc="Generating class images", disable=not accelerator.is_local_main_process
|
||||
):
|
||||
images = pipeline(example["prompt"]).images
|
||||
with torch.autocast(device_type=accelerator.device.type, dtype=torch_dtype):
|
||||
images = pipeline(prompt=example["prompt"]).images
|
||||
|
||||
for i, image in enumerate(images):
|
||||
hash_image = insecure_hashlib.sha1(image.tobytes()).hexdigest()
|
||||
|
||||
@@ -25,6 +25,11 @@ from os.path import abspath, dirname, join
|
||||
git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src"))
|
||||
sys.path.insert(1, git_repo_path)
|
||||
|
||||
# Add parent directory to path so we can import from tests
|
||||
repo_root = abspath(dirname(dirname(__file__)))
|
||||
if repo_root not in sys.path:
|
||||
sys.path.insert(0, repo_root)
|
||||
|
||||
|
||||
# silence FutureWarning warnings in tests since often we can't act on them until
|
||||
# they become normal warnings - i.e. the tests still need to test the current functionality
|
||||
@@ -32,13 +37,13 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
from diffusers.utils.testing_utils import pytest_addoption_shared
|
||||
from tests.testing_utils import pytest_addoption_shared
|
||||
|
||||
pytest_addoption_shared(parser)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter):
|
||||
from diffusers.utils.testing_utils import pytest_terminal_summary_main
|
||||
from tests.testing_utils import pytest_terminal_summary_main
|
||||
|
||||
make_reports = terminalreporter.config.getoption("--make-reports")
|
||||
if make_reports:
|
||||
|
||||
@@ -24,6 +24,8 @@ import math
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
|
||||
# Add repo root to path to import from tests
|
||||
from pathlib import Path
|
||||
|
||||
import accelerate
|
||||
@@ -54,8 +56,7 @@ from diffusers.optimization import get_scheduler
|
||||
from diffusers.training_utils import compute_density_for_timestep_sampling, compute_loss_weighting_for_sd3, free_memory
|
||||
from diffusers.utils import check_min_version, is_wandb_available, make_image_grid
|
||||
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
|
||||
from diffusers.utils.testing_utils import backend_empty_cache
|
||||
from diffusers.utils.torch_utils import is_compiled_module
|
||||
from diffusers.utils.torch_utils import backend_empty_cache, is_compiled_module
|
||||
|
||||
|
||||
if is_wandb_available():
|
||||
|
||||
@@ -1131,6 +1131,7 @@ def main(args):
|
||||
torch_dtype = torch.float16
|
||||
elif args.prior_generation_precision == "bf16":
|
||||
torch_dtype = torch.bfloat16
|
||||
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
@@ -1151,7 +1152,8 @@ def main(args):
|
||||
for example in tqdm(
|
||||
sample_dataloader, desc="Generating class images", disable=not accelerator.is_local_main_process
|
||||
):
|
||||
images = pipeline(example["prompt"]).images
|
||||
with torch.autocast(device_type=accelerator.device.type, dtype=torch_dtype):
|
||||
images = pipeline(prompt=example["prompt"]).images
|
||||
|
||||
for i, image in enumerate(images):
|
||||
hash_image = insecure_hashlib.sha1(image.tobytes()).hexdigest()
|
||||
@@ -1159,8 +1161,7 @@ def main(args):
|
||||
image.save(image_filename)
|
||||
|
||||
del pipeline
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
free_memory()
|
||||
|
||||
# Handle the repository creation
|
||||
if accelerator.is_main_process:
|
||||
@@ -1728,6 +1729,10 @@ def main(args):
|
||||
device=accelerator.device,
|
||||
prompt=args.instance_prompt,
|
||||
)
|
||||
else:
|
||||
prompt_embeds, pooled_prompt_embeds, text_ids = compute_text_embeddings(
|
||||
prompts, text_encoders, tokenizers
|
||||
)
|
||||
|
||||
# Convert images to latent space
|
||||
if args.cache_latents:
|
||||
|
||||
@@ -1270,6 +1270,7 @@ def main(args):
|
||||
subfolder="transformer",
|
||||
revision=args.revision,
|
||||
variant=args.variant,
|
||||
torch_dtype=torch_dtype,
|
||||
)
|
||||
pipeline = FluxKontextPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
@@ -1292,7 +1293,8 @@ def main(args):
|
||||
for example in tqdm(
|
||||
sample_dataloader, desc="Generating class images", disable=not accelerator.is_local_main_process
|
||||
):
|
||||
images = pipeline(example["prompt"]).images
|
||||
with torch.autocast(device_type=accelerator.device.type, dtype=torch_dtype):
|
||||
images = pipeline(prompt=example["prompt"]).images
|
||||
|
||||
for i, image in enumerate(images):
|
||||
hash_image = insecure_hashlib.sha1(image.tobytes()).hexdigest()
|
||||
@@ -1899,6 +1901,10 @@ def main(args):
|
||||
device=accelerator.device,
|
||||
prompt=args.instance_prompt,
|
||||
)
|
||||
else:
|
||||
prompt_embeds, pooled_prompt_embeds, text_ids = compute_text_embeddings(
|
||||
prompts, text_encoders, tokenizers
|
||||
)
|
||||
|
||||
# Convert images to latent space
|
||||
if args.cache_latents:
|
||||
|
||||
@@ -1760,7 +1760,7 @@
|
||||
"clip_local = None\n",
|
||||
"clip_pos = None\n",
|
||||
"\n",
|
||||
"# constands for data handling\n",
|
||||
"# constants for data handling\n",
|
||||
"save_traj = False\n",
|
||||
"save_data = False\n",
|
||||
"output_dir = \"/content/\""
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
Please note that this project is not actively maintained. However, you can open an issue and tag @gzguevara.
|
||||
|
||||
[DreamBooth](https://huggingface.co/papers/2208.12242) is a method to personalize text2image models like stable diffusion given just a few(3~5) images of a subject. This project consists of **two parts**. Training Stable Diffusion for inpainting requieres prompt-image-mask pairs. The Unet of inpainiting models have 5 additional input channels (4 for the encoded masked-image and 1 for the mask itself).
|
||||
[DreamBooth](https://huggingface.co/papers/2208.12242) is a method to personalize text2image models like stable diffusion given just a few(3~5) images of a subject. This project consists of **two parts**. Training Stable Diffusion for inpainting requires prompt-image-mask pairs. The Unet of inpainiting models have 5 additional input channels (4 for the encoded masked-image and 1 for the mask itself).
|
||||
|
||||
**The first part**, the `multi_inpaint_dataset.ipynb` notebook, demonstrates how make a 🤗 dataset of prompt-image-mask pairs. You can, however, skip the first part and move straight to the second part with the example datasets in this project. ([cat toy dataset masked](https://huggingface.co/datasets/gzguevara/cat_toy_masked), [mr. potato head dataset masked](https://huggingface.co/datasets/gzguevara/mr_potato_head_masked))
|
||||
|
||||
|
||||
@@ -24,12 +24,18 @@ import tempfile
|
||||
import torch
|
||||
|
||||
from diffusers import VQModel
|
||||
from diffusers.utils.testing_utils import require_timm
|
||||
|
||||
|
||||
# Add parent directories to path to import from tests
|
||||
sys.path.append("..")
|
||||
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
||||
if repo_root not in sys.path:
|
||||
sys.path.insert(0, repo_root)
|
||||
|
||||
from test_examples_utils import ExamplesTestsAccelerate, run_command # noqa: E402
|
||||
|
||||
from tests.testing_utils import require_timm # noqa
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
|
||||
@@ -132,6 +132,7 @@ _deps = [
|
||||
"gguf>=0.10.0",
|
||||
"torchao>=0.7.0",
|
||||
"bitsandbytes>=0.43.3",
|
||||
"nvidia_modelopt[hf]>=0.33.1",
|
||||
"regex!=2019.12.17",
|
||||
"requests",
|
||||
"tensorboard",
|
||||
@@ -244,6 +245,7 @@ extras["bitsandbytes"] = deps_list("bitsandbytes", "accelerate")
|
||||
extras["gguf"] = deps_list("gguf", "accelerate")
|
||||
extras["optimum_quanto"] = deps_list("optimum_quanto", "accelerate")
|
||||
extras["torchao"] = deps_list("torchao", "accelerate")
|
||||
extras["nvidia_modelopt"] = deps_list("nvidia_modelopt[hf]")
|
||||
|
||||
if os.name == "nt": # windows
|
||||
extras["flax"] = [] # jax is not supported on windows
|
||||
|
||||
@@ -13,6 +13,7 @@ from .utils import (
|
||||
is_k_diffusion_available,
|
||||
is_librosa_available,
|
||||
is_note_seq_available,
|
||||
is_nvidia_modelopt_available,
|
||||
is_onnx_available,
|
||||
is_opencv_available,
|
||||
is_optimum_quanto_available,
|
||||
@@ -111,6 +112,18 @@ except OptionalDependencyNotAvailable:
|
||||
else:
|
||||
_import_structure["quantizers.quantization_config"].append("QuantoConfig")
|
||||
|
||||
try:
|
||||
if not is_torch_available() and not is_accelerate_available() and not is_nvidia_modelopt_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
from .utils import dummy_nvidia_modelopt_objects
|
||||
|
||||
_import_structure["utils.dummy_nvidia_modelopt_objects"] = [
|
||||
name for name in dir(dummy_nvidia_modelopt_objects) if not name.startswith("_")
|
||||
]
|
||||
else:
|
||||
_import_structure["quantizers.quantization_config"].append("NVIDIAModelOptConfig")
|
||||
|
||||
try:
|
||||
if not is_onnx_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
@@ -494,6 +507,7 @@ else:
|
||||
"PixArtSigmaPAGPipeline",
|
||||
"PixArtSigmaPipeline",
|
||||
"QwenImageControlNetPipeline",
|
||||
"QwenImageEditInpaintPipeline",
|
||||
"QwenImageEditPipeline",
|
||||
"QwenImageImg2ImgPipeline",
|
||||
"QwenImageInpaintPipeline",
|
||||
@@ -794,6 +808,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
else:
|
||||
from .quantizers.quantization_config import QuantoConfig
|
||||
|
||||
try:
|
||||
if not is_nvidia_modelopt_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
except OptionalDependencyNotAvailable:
|
||||
from .utils.dummy_nvidia_modelopt_objects import *
|
||||
else:
|
||||
from .quantizers.quantization_config import NVIDIAModelOptConfig
|
||||
|
||||
try:
|
||||
if not is_onnx_available():
|
||||
raise OptionalDependencyNotAvailable()
|
||||
@@ -1134,6 +1156,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
PixArtSigmaPAGPipeline,
|
||||
PixArtSigmaPipeline,
|
||||
QwenImageControlNetPipeline,
|
||||
QwenImageEditInpaintPipeline,
|
||||
QwenImageEditPipeline,
|
||||
QwenImageImg2ImgPipeline,
|
||||
QwenImageInpaintPipeline,
|
||||
|
||||
@@ -39,6 +39,7 @@ deps = {
|
||||
"gguf": "gguf>=0.10.0",
|
||||
"torchao": "torchao>=0.7.0",
|
||||
"bitsandbytes": "bitsandbytes>=0.43.3",
|
||||
"nvidia_modelopt[hf]": "nvidia_modelopt[hf]>=0.33.1",
|
||||
"regex": "regex!=2019.12.17",
|
||||
"requests": "requests",
|
||||
"tensorboard": "tensorboard",
|
||||
|
||||
@@ -82,15 +82,15 @@ class AutoGuidance(BaseGuidance):
|
||||
self.guidance_rescale = guidance_rescale
|
||||
self.use_original_formulation = use_original_formulation
|
||||
|
||||
if auto_guidance_layers is None and auto_guidance_config is None:
|
||||
is_layer_or_config_provided = auto_guidance_layers is not None or auto_guidance_config is not None
|
||||
is_layer_and_config_provided = auto_guidance_layers is not None and auto_guidance_config is not None
|
||||
if not is_layer_or_config_provided:
|
||||
raise ValueError(
|
||||
"Either `auto_guidance_layers` or `auto_guidance_config` must be provided to enable Skip Layer Guidance."
|
||||
"Either `auto_guidance_layers` or `auto_guidance_config` must be provided to enable AutoGuidance."
|
||||
)
|
||||
if auto_guidance_layers is not None and auto_guidance_config is not None:
|
||||
if is_layer_and_config_provided:
|
||||
raise ValueError("Only one of `auto_guidance_layers` or `auto_guidance_config` can be provided.")
|
||||
if (dropout is None and auto_guidance_layers is not None) or (
|
||||
dropout is not None and auto_guidance_layers is None
|
||||
):
|
||||
if auto_guidance_config is None and dropout is None:
|
||||
raise ValueError("`dropout` must be provided if `auto_guidance_layers` is provided.")
|
||||
|
||||
if auto_guidance_layers is not None:
|
||||
|
||||
@@ -61,7 +61,7 @@ def project(v0: torch.Tensor, v1: torch.Tensor, upcast_to_double: bool = True) -
|
||||
def build_image_from_pyramid(pyramid: List[torch.Tensor]) -> torch.Tensor:
|
||||
"""
|
||||
Recovers the data space latents from the Laplacian pyramid frequency space. Implementation from the paper
|
||||
(Algorihtm 2).
|
||||
(Algorithm 2).
|
||||
"""
|
||||
# pyramid shapes: [[B, C, H, W], [B, C, H/2, W/2], ...]
|
||||
img = pyramid[-1]
|
||||
|
||||
@@ -54,11 +54,11 @@ class FasterCacheConfig:
|
||||
Attributes:
|
||||
spatial_attention_block_skip_range (`int`, defaults to `2`):
|
||||
Calculate the attention states every `N` iterations. If this is set to `N`, the attention computation will
|
||||
be skipped `N - 1` times (i.e., cached attention states will be re-used) before computing the new attention
|
||||
be skipped `N - 1` times (i.e., cached attention states will be reused) before computing the new attention
|
||||
states again.
|
||||
temporal_attention_block_skip_range (`int`, *optional*, defaults to `None`):
|
||||
Calculate the attention states every `N` iterations. If this is set to `N`, the attention computation will
|
||||
be skipped `N - 1` times (i.e., cached attention states will be re-used) before computing the new attention
|
||||
be skipped `N - 1` times (i.e., cached attention states will be reused) before computing the new attention
|
||||
states again.
|
||||
spatial_attention_timestep_skip_range (`Tuple[float, float]`, defaults to `(-1, 681)`):
|
||||
The timestep range within which the spatial attention computation can be skipped without a significant loss
|
||||
@@ -90,7 +90,7 @@ class FasterCacheConfig:
|
||||
from the conditional branch outputs.
|
||||
unconditional_batch_skip_range (`int`, defaults to `5`):
|
||||
Process the unconditional branch every `N` iterations. If this is set to `N`, the unconditional branch
|
||||
computation will be skipped `N - 1` times (i.e., cached unconditional branch states will be re-used) before
|
||||
computation will be skipped `N - 1` times (i.e., cached unconditional branch states will be reused) before
|
||||
computing the new unconditional branch states again.
|
||||
unconditional_batch_timestep_skip_range (`Tuple[float, float]`, defaults to `(-1, 641)`):
|
||||
The timestep range within which the unconditional branch computation can be skipped without a significant
|
||||
|
||||
@@ -45,15 +45,15 @@ class PyramidAttentionBroadcastConfig:
|
||||
spatial_attention_block_skip_range (`int`, *optional*, defaults to `None`):
|
||||
The number of times a specific spatial attention broadcast is skipped before computing the attention states
|
||||
to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
|
||||
old attention states will be re-used) before computing the new attention states again.
|
||||
old attention states will be reused) before computing the new attention states again.
|
||||
temporal_attention_block_skip_range (`int`, *optional*, defaults to `None`):
|
||||
The number of times a specific temporal attention broadcast is skipped before computing the attention
|
||||
states to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times
|
||||
(i.e., old attention states will be re-used) before computing the new attention states again.
|
||||
(i.e., old attention states will be reused) before computing the new attention states again.
|
||||
cross_attention_block_skip_range (`int`, *optional*, defaults to `None`):
|
||||
The number of times a specific cross-attention broadcast is skipped before computing the attention states
|
||||
to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e.,
|
||||
old attention states will be re-used) before computing the new attention states again.
|
||||
old attention states will be reused) before computing the new attention states again.
|
||||
spatial_attention_timestep_skip_range (`Tuple[int, int]`, defaults to `(100, 800)`):
|
||||
The range of timesteps to skip in the spatial attention layer. The attention computations will be
|
||||
conditionally skipped if the current timestep is within the specified range.
|
||||
@@ -305,7 +305,7 @@ def _apply_pyramid_attention_broadcast_hook(
|
||||
block_skip_range (`int`):
|
||||
The number of times a specific attention broadcast is skipped before computing the attention states to
|
||||
re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., old
|
||||
attention states will be re-used) before computing the new attention states again.
|
||||
attention states will be reused) before computing the new attention states again.
|
||||
current_timestep_callback (`Callable[[], int]`):
|
||||
A callback function that returns the current inference timestep.
|
||||
"""
|
||||
|
||||
@@ -2129,6 +2129,10 @@ def _convert_non_diffusers_ltxv_lora_to_diffusers(state_dict, non_diffusers_pref
|
||||
|
||||
|
||||
def _convert_non_diffusers_qwen_lora_to_diffusers(state_dict):
|
||||
has_diffusion_model = any(k.startswith("diffusion_model.") for k in state_dict)
|
||||
if has_diffusion_model:
|
||||
state_dict = {k.removeprefix("diffusion_model."): v for k, v in state_dict.items()}
|
||||
|
||||
has_lora_unet = any(k.startswith("lora_unet_") for k in state_dict)
|
||||
if has_lora_unet:
|
||||
state_dict = {k.removeprefix("lora_unet_"): v for k, v in state_dict.items()}
|
||||
@@ -2201,29 +2205,44 @@ def _convert_non_diffusers_qwen_lora_to_diffusers(state_dict):
|
||||
all_keys = list(state_dict.keys())
|
||||
down_key = ".lora_down.weight"
|
||||
up_key = ".lora_up.weight"
|
||||
a_key = ".lora_A.weight"
|
||||
b_key = ".lora_B.weight"
|
||||
|
||||
def get_alpha_scales(down_weight, alpha_key):
|
||||
rank = down_weight.shape[0]
|
||||
alpha = state_dict.pop(alpha_key).item()
|
||||
scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here
|
||||
scale_down = scale
|
||||
scale_up = 1.0
|
||||
while scale_down * 2 < scale_up:
|
||||
scale_down *= 2
|
||||
scale_up /= 2
|
||||
return scale_down, scale_up
|
||||
has_non_diffusers_lora_id = any(down_key in k or up_key in k for k in all_keys)
|
||||
has_diffusers_lora_id = any(a_key in k or b_key in k for k in all_keys)
|
||||
|
||||
for k in all_keys:
|
||||
if k.endswith(down_key):
|
||||
diffusers_down_key = k.replace(down_key, ".lora_A.weight")
|
||||
diffusers_up_key = k.replace(down_key, up_key).replace(up_key, ".lora_B.weight")
|
||||
alpha_key = k.replace(down_key, ".alpha")
|
||||
if has_non_diffusers_lora_id:
|
||||
|
||||
down_weight = state_dict.pop(k)
|
||||
up_weight = state_dict.pop(k.replace(down_key, up_key))
|
||||
scale_down, scale_up = get_alpha_scales(down_weight, alpha_key)
|
||||
converted_state_dict[diffusers_down_key] = down_weight * scale_down
|
||||
converted_state_dict[diffusers_up_key] = up_weight * scale_up
|
||||
def get_alpha_scales(down_weight, alpha_key):
|
||||
rank = down_weight.shape[0]
|
||||
alpha = state_dict.pop(alpha_key).item()
|
||||
scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here
|
||||
scale_down = scale
|
||||
scale_up = 1.0
|
||||
while scale_down * 2 < scale_up:
|
||||
scale_down *= 2
|
||||
scale_up /= 2
|
||||
return scale_down, scale_up
|
||||
|
||||
for k in all_keys:
|
||||
if k.endswith(down_key):
|
||||
diffusers_down_key = k.replace(down_key, ".lora_A.weight")
|
||||
diffusers_up_key = k.replace(down_key, up_key).replace(up_key, ".lora_B.weight")
|
||||
alpha_key = k.replace(down_key, ".alpha")
|
||||
|
||||
down_weight = state_dict.pop(k)
|
||||
up_weight = state_dict.pop(k.replace(down_key, up_key))
|
||||
scale_down, scale_up = get_alpha_scales(down_weight, alpha_key)
|
||||
converted_state_dict[diffusers_down_key] = down_weight * scale_down
|
||||
converted_state_dict[diffusers_up_key] = up_weight * scale_up
|
||||
|
||||
# Already in diffusers format (lora_A/lora_B), just pop
|
||||
elif has_diffusers_lora_id:
|
||||
for k in all_keys:
|
||||
if a_key in k or b_key in k:
|
||||
converted_state_dict[k] = state_dict.pop(k)
|
||||
elif ".alpha" in k:
|
||||
state_dict.pop(k)
|
||||
|
||||
if len(state_dict) > 0:
|
||||
raise ValueError(f"`state_dict` should be empty at this point but has {state_dict.keys()=}")
|
||||
|
||||
@@ -6684,7 +6684,8 @@ class QwenImageLoraLoaderMixin(LoraBaseMixin):
|
||||
|
||||
has_alphas_in_sd = any(k.endswith(".alpha") for k in state_dict)
|
||||
has_lora_unet = any(k.startswith("lora_unet_") for k in state_dict)
|
||||
if has_alphas_in_sd or has_lora_unet:
|
||||
has_diffusion_model = any(k.startswith("diffusion_model.") for k in state_dict)
|
||||
if has_alphas_in_sd or has_lora_unet or has_diffusion_model:
|
||||
state_dict = _convert_non_diffusers_qwen_lora_to_diffusers(state_dict)
|
||||
|
||||
out = (state_dict, metadata) if return_lora_metadata else state_dict
|
||||
|
||||
@@ -26,6 +26,7 @@ from ..utils import (
|
||||
is_flash_attn_3_available,
|
||||
is_flash_attn_available,
|
||||
is_flash_attn_version,
|
||||
is_kernels_available,
|
||||
is_sageattention_available,
|
||||
is_sageattention_version,
|
||||
is_torch_npu_available,
|
||||
@@ -35,7 +36,7 @@ from ..utils import (
|
||||
is_xformers_available,
|
||||
is_xformers_version,
|
||||
)
|
||||
from ..utils.constants import DIFFUSERS_ATTN_BACKEND, DIFFUSERS_ATTN_CHECKS
|
||||
from ..utils.constants import DIFFUSERS_ATTN_BACKEND, DIFFUSERS_ATTN_CHECKS, DIFFUSERS_ENABLE_HUB_KERNELS
|
||||
|
||||
|
||||
_REQUIRED_FLASH_VERSION = "2.6.3"
|
||||
@@ -67,6 +68,17 @@ else:
|
||||
flash_attn_3_func = None
|
||||
flash_attn_3_varlen_func = None
|
||||
|
||||
if DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
if not is_kernels_available():
|
||||
raise ImportError(
|
||||
"To use FA3 kernel for your hardware from the Hub, the `kernels` library must be installed. Install with `pip install kernels`."
|
||||
)
|
||||
from ..utils.kernels_utils import _get_fa3_from_hub
|
||||
|
||||
flash_attn_interface_hub = _get_fa3_from_hub()
|
||||
flash_attn_3_func_hub = flash_attn_interface_hub.flash_attn_func
|
||||
else:
|
||||
flash_attn_3_func_hub = None
|
||||
|
||||
if _CAN_USE_SAGE_ATTN:
|
||||
from sageattention import (
|
||||
@@ -153,6 +165,8 @@ class AttentionBackendName(str, Enum):
|
||||
FLASH_VARLEN = "flash_varlen"
|
||||
_FLASH_3 = "_flash_3"
|
||||
_FLASH_VARLEN_3 = "_flash_varlen_3"
|
||||
_FLASH_3_HUB = "_flash_3_hub"
|
||||
# _FLASH_VARLEN_3_HUB = "_flash_varlen_3_hub" # not supported yet.
|
||||
|
||||
# PyTorch native
|
||||
FLEX = "flex"
|
||||
@@ -351,6 +365,17 @@ def _check_attention_backend_requirements(backend: AttentionBackendName) -> None
|
||||
f"Flash Attention 3 backend '{backend.value}' is not usable because of missing package or the version is too old. Please build FA3 beta release from source."
|
||||
)
|
||||
|
||||
# TODO: add support Hub variant of FA3 varlen later
|
||||
elif backend in [AttentionBackendName._FLASH_3_HUB]:
|
||||
if not DIFFUSERS_ENABLE_HUB_KERNELS:
|
||||
raise RuntimeError(
|
||||
f"Flash Attention 3 Hub backend '{backend.value}' is not usable because the `DIFFUSERS_ENABLE_HUB_KERNELS` env var isn't set. Please set it like `export DIFFUSERS_ENABLE_HUB_KERNELS=yes`."
|
||||
)
|
||||
if not is_kernels_available():
|
||||
raise RuntimeError(
|
||||
f"Flash Attention 3 Hub backend '{backend.value}' is not usable because the `kernels` package isn't available. Please install it with `pip install kernels`."
|
||||
)
|
||||
|
||||
elif backend in [
|
||||
AttentionBackendName.SAGE,
|
||||
AttentionBackendName.SAGE_VARLEN,
|
||||
@@ -657,6 +682,44 @@ def _flash_attention_3(
|
||||
return (out, lse) if return_attn_probs else out
|
||||
|
||||
|
||||
@_AttentionBackendRegistry.register(
|
||||
AttentionBackendName._FLASH_3_HUB,
|
||||
constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape],
|
||||
)
|
||||
def _flash_attention_3_hub(
|
||||
query: torch.Tensor,
|
||||
key: torch.Tensor,
|
||||
value: torch.Tensor,
|
||||
scale: Optional[float] = None,
|
||||
is_causal: bool = False,
|
||||
window_size: Tuple[int, int] = (-1, -1),
|
||||
softcap: float = 0.0,
|
||||
deterministic: bool = False,
|
||||
return_attn_probs: bool = False,
|
||||
) -> torch.Tensor:
|
||||
out = flash_attn_3_func_hub(
|
||||
q=query,
|
||||
k=key,
|
||||
v=value,
|
||||
softmax_scale=scale,
|
||||
causal=is_causal,
|
||||
qv=None,
|
||||
q_descale=None,
|
||||
k_descale=None,
|
||||
v_descale=None,
|
||||
window_size=window_size,
|
||||
softcap=softcap,
|
||||
num_splits=1,
|
||||
pack_gqa=None,
|
||||
deterministic=deterministic,
|
||||
sm_margin=0,
|
||||
return_attn_probs=return_attn_probs,
|
||||
)
|
||||
# When `return_attn_probs` is True, the above returns a tuple of
|
||||
# actual outputs and lse.
|
||||
return (out[0], out[1]) if return_attn_probs else out
|
||||
|
||||
|
||||
@_AttentionBackendRegistry.register(
|
||||
AttentionBackendName._FLASH_VARLEN_3,
|
||||
constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape],
|
||||
@@ -955,12 +1018,13 @@ def _native_npu_attention(
|
||||
dropout_p: float = 0.0,
|
||||
scale: Optional[float] = None,
|
||||
) -> torch.Tensor:
|
||||
return npu_fusion_attention(
|
||||
query, key, value = (x.transpose(1, 2).contiguous() for x in (query, key, value))
|
||||
out = npu_fusion_attention(
|
||||
query,
|
||||
key,
|
||||
value,
|
||||
query.size(2), # num_heads
|
||||
input_layout="BSND",
|
||||
query.size(1), # num_heads
|
||||
input_layout="BNSD",
|
||||
pse=None,
|
||||
scale=1.0 / math.sqrt(query.shape[-1]) if scale is None else scale,
|
||||
pre_tockens=65536,
|
||||
@@ -969,6 +1033,8 @@ def _native_npu_attention(
|
||||
sync=False,
|
||||
inner_precise=0,
|
||||
)[0]
|
||||
out = out.transpose(1, 2).contiguous()
|
||||
return out
|
||||
|
||||
|
||||
# Reference: https://github.com/pytorch/xla/blob/06c5533de6588f6b90aa1655d9850bcf733b90b4/torch_xla/experimental/custom_kernel.py#L853
|
||||
|
||||
@@ -21,7 +21,7 @@ import torch.nn as nn
|
||||
from ...configuration_utils import ConfigMixin, register_to_config
|
||||
from ...loaders import FromOriginalModelMixin, PeftAdapterMixin
|
||||
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
|
||||
from ..attention import FeedForward
|
||||
from ..attention import AttentionMixin, FeedForward
|
||||
from ..cache_utils import CacheMixin
|
||||
from ..modeling_outputs import Transformer2DModelOutput
|
||||
from ..modeling_utils import ModelMixin
|
||||
@@ -134,7 +134,9 @@ class WanVACETransformerBlock(nn.Module):
|
||||
return conditioning_states, control_hidden_states
|
||||
|
||||
|
||||
class WanVACETransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin):
|
||||
class WanVACETransformer3DModel(
|
||||
ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin
|
||||
):
|
||||
r"""
|
||||
A Transformer model for video-like data used in the Wan model.
|
||||
|
||||
|
||||
@@ -220,7 +220,7 @@ class FluxDenoiseStep(FluxDenoiseLoopWrapper):
|
||||
return (
|
||||
"Denoise step that iteratively denoise the latents. \n"
|
||||
"Its loop logic is defined in `FluxDenoiseLoopWrapper.__call__` method \n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
|
||||
" - `FluxLoopDenoiser`\n"
|
||||
" - `FluxLoopAfterDenoiser`\n"
|
||||
"This block supports both text2image and img2img tasks."
|
||||
|
||||
@@ -229,7 +229,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
|
||||
Base class for all Pipeline Blocks: PipelineBlock, AutoPipelineBlocks, SequentialPipelineBlocks,
|
||||
LoopSequentialPipelineBlocks
|
||||
|
||||
[`ModularPipelineBlocks`] provides method to load and save the defination of pipeline blocks.
|
||||
[`ModularPipelineBlocks`] provides method to load and save the definition of pipeline blocks.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
@@ -299,7 +299,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
|
||||
def from_pretrained(
|
||||
cls,
|
||||
pretrained_model_name_or_path: str,
|
||||
trust_remote_code: Optional[bool] = None,
|
||||
trust_remote_code: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
hub_kwargs_names = [
|
||||
@@ -1418,7 +1418,7 @@ class LoopSequentialPipelineBlocks(ModularPipelineBlocks):
|
||||
# YiYi TODO:
|
||||
# 1. look into the serialization of modular_model_index.json, make sure the items are properly ordered like model_index.json (currently a mess)
|
||||
# 2. do we need ConfigSpec? the are basically just key/val kwargs
|
||||
# 3. imnprove docstring and potentially add validator for methods where we accpet kwargs to be passed to from_pretrained/save_pretrained/load_default_components(), load_components()
|
||||
# 3. imnprove docstring and potentially add validator for methods where we accept kwargs to be passed to from_pretrained/save_pretrained/load_components()
|
||||
class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
"""
|
||||
Base class for all Modular pipelines.
|
||||
@@ -1488,7 +1488,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
- Components with default_creation_method="from_config" are created immediately, its specs are not included
|
||||
in config dict and will not be saved in `modular_model_index.json`
|
||||
- Components with default_creation_method="from_pretrained" are set to None and can be loaded later with
|
||||
`load_default_components()`/`load_components()`
|
||||
`load_components()` (with or without specific component names)
|
||||
- The pipeline's config dict is populated with component specs (only for from_pretrained components) and
|
||||
config values, which will be saved as `modular_model_index.json` during `save_pretrained`
|
||||
- The pipeline's config dict is also used to store the pipeline blocks's class name, which will be saved as
|
||||
@@ -1603,20 +1603,6 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
params[input_param.name] = input_param.default
|
||||
return params
|
||||
|
||||
def load_default_components(self, **kwargs):
|
||||
"""
|
||||
Load from_pretrained components using the loading specs in the config dict.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional arguments passed to `from_pretrained` method, e.g. torch_dtype, cache_dir, etc.
|
||||
"""
|
||||
names = [
|
||||
name
|
||||
for name in self._component_specs.keys()
|
||||
if self._component_specs[name].default_creation_method == "from_pretrained"
|
||||
]
|
||||
self.load_components(names=names, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def from_pretrained(
|
||||
@@ -1770,8 +1756,8 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
- non from_pretrained components are created during __init__ and registered as the object itself
|
||||
- Components are updated with the `update_components()` method: e.g. loader.update_components(unet=unet) or
|
||||
loader.update_components(guider=guider_spec)
|
||||
- (from_pretrained) Components are loaded with the `load_default_components()` method: e.g.
|
||||
loader.load_default_components(names=["unet"])
|
||||
- (from_pretrained) Components are loaded with the `load_components()` method: e.g.
|
||||
loader.load_components(names=["unet"]) or loader.load_components() to load all default components
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments where keys are component names and values are component objects.
|
||||
@@ -2097,13 +2083,14 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
self.register_to_config(**config_to_register)
|
||||
|
||||
# YiYi TODO: support map for additional from_pretrained kwargs
|
||||
# YiYi/Dhruv TODO: consolidate load_components and load_default_components?
|
||||
def load_components(self, names: Union[List[str], str], **kwargs):
|
||||
def load_components(self, names: Optional[Union[List[str], str]] = None, **kwargs):
|
||||
"""
|
||||
Load selected components from specs.
|
||||
|
||||
Args:
|
||||
names: List of component names to load; by default will not load any components
|
||||
names: List of component names to load. If None, will load all components with
|
||||
default_creation_method == "from_pretrained". If provided as a list or string, will load only the
|
||||
specified components.
|
||||
**kwargs: additional kwargs to be passed to `from_pretrained()`.Can be:
|
||||
- a single value to be applied to all components to be loaded, e.g. torch_dtype=torch.bfloat16
|
||||
- a dict, e.g. torch_dtype={"unet": torch.bfloat16, "default": torch.float32}
|
||||
@@ -2111,7 +2098,13 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
`variant`, `revision`, etc.
|
||||
"""
|
||||
|
||||
if isinstance(names, str):
|
||||
if names is None:
|
||||
names = [
|
||||
name
|
||||
for name in self._component_specs.keys()
|
||||
if self._component_specs[name].default_creation_method == "from_pretrained"
|
||||
]
|
||||
elif isinstance(names, str):
|
||||
names = [names]
|
||||
elif not isinstance(names, list):
|
||||
raise ValueError(f"Invalid type for names: {type(names)}")
|
||||
|
||||
@@ -384,14 +384,14 @@ class ModularNode(ConfigMixin):
|
||||
# pass or create a default param dict for each input
|
||||
# e.g. for prompt,
|
||||
# prompt = {
|
||||
# "name": "text_input", # the name of the input in node defination, could be different from the input name in diffusers
|
||||
# "name": "text_input", # the name of the input in node definition, could be different from the input name in diffusers
|
||||
# "label": "Prompt",
|
||||
# "type": "string",
|
||||
# "default": "a bear sitting in a chair drinking a milkshake",
|
||||
# "display": "textarea"}
|
||||
# if type is not specified, it'll be a "custom" param of its own type
|
||||
# e.g. you can pass ModularNode(scheduler = {name :"scheduler"})
|
||||
# it will get this spec in node defination {"scheduler": {"label": "Scheduler", "type": "scheduler", "display": "input"}}
|
||||
# it will get this spec in node definition {"scheduler": {"label": "Scheduler", "type": "scheduler", "display": "input"}}
|
||||
# name can be a dict, in that case, it is part of a "dict" input in mellon nodes, e.g. text_encoder= {name: {"text_encoders": "text_encoder"}}
|
||||
inputs = self.blocks.inputs + self.blocks.intermediate_inputs
|
||||
for inp in inputs:
|
||||
|
||||
@@ -695,7 +695,7 @@ class StableDiffusionXLDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
|
||||
return (
|
||||
"Denoise step that iteratively denoise the latents. \n"
|
||||
"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
|
||||
" - `StableDiffusionXLLoopBeforeDenoiser`\n"
|
||||
" - `StableDiffusionXLLoopDenoiser`\n"
|
||||
" - `StableDiffusionXLLoopAfterDenoiser`\n"
|
||||
@@ -717,7 +717,7 @@ class StableDiffusionXLControlNetDenoiseStep(StableDiffusionXLDenoiseLoopWrapper
|
||||
return (
|
||||
"Denoise step that iteratively denoise the latents with controlnet. \n"
|
||||
"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
|
||||
" - `StableDiffusionXLLoopBeforeDenoiser`\n"
|
||||
" - `StableDiffusionXLControlNetLoopDenoiser`\n"
|
||||
" - `StableDiffusionXLLoopAfterDenoiser`\n"
|
||||
@@ -739,7 +739,7 @@ class StableDiffusionXLInpaintDenoiseStep(StableDiffusionXLDenoiseLoopWrapper):
|
||||
return (
|
||||
"Denoise step that iteratively denoise the latents(for inpainting task only). \n"
|
||||
"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
|
||||
" - `StableDiffusionXLInpaintLoopBeforeDenoiser`\n"
|
||||
" - `StableDiffusionXLLoopDenoiser`\n"
|
||||
" - `StableDiffusionXLInpaintLoopAfterDenoiser`\n"
|
||||
@@ -761,7 +761,7 @@ class StableDiffusionXLInpaintControlNetDenoiseStep(StableDiffusionXLDenoiseLoop
|
||||
return (
|
||||
"Denoise step that iteratively denoise the latents(for inpainting task only) with controlnet. \n"
|
||||
"Its loop logic is defined in `StableDiffusionXLDenoiseLoopWrapper.__call__` method \n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
|
||||
" - `StableDiffusionXLInpaintLoopBeforeDenoiser`\n"
|
||||
" - `StableDiffusionXLControlNetLoopDenoiser`\n"
|
||||
" - `StableDiffusionXLInpaintLoopAfterDenoiser`\n"
|
||||
|
||||
@@ -253,7 +253,7 @@ class WanDenoiseStep(WanDenoiseLoopWrapper):
|
||||
return (
|
||||
"Denoise step that iteratively denoise the latents. \n"
|
||||
"Its loop logic is defined in `WanDenoiseLoopWrapper.__call__` method \n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequentially:\n"
|
||||
" - `WanLoopDenoiser`\n"
|
||||
" - `WanLoopAfterDenoiser`\n"
|
||||
"This block supports both text2vid tasks."
|
||||
|
||||
@@ -393,6 +393,7 @@ else:
|
||||
"QwenImageImg2ImgPipeline",
|
||||
"QwenImageInpaintPipeline",
|
||||
"QwenImageEditPipeline",
|
||||
"QwenImageEditInpaintPipeline",
|
||||
"QwenImageControlNetPipeline",
|
||||
]
|
||||
try:
|
||||
@@ -714,6 +715,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
||||
from .qwenimage import (
|
||||
QwenImageControlNetPipeline,
|
||||
QwenImageEditInpaintPipeline,
|
||||
QwenImageEditPipeline,
|
||||
QwenImageImg2ImgPipeline,
|
||||
QwenImageInpaintPipeline,
|
||||
|
||||
@@ -613,7 +613,7 @@ def _assign_components_to_devices(
|
||||
|
||||
|
||||
def _get_final_device_map(device_map, pipeline_class, passed_class_obj, init_dict, library, max_memory, **kwargs):
|
||||
# TODO: seperate out different device_map methods when it gets to it.
|
||||
# TODO: separate out different device_map methods when it gets to it.
|
||||
if device_map != "balanced":
|
||||
return device_map
|
||||
# To avoid circular import problem.
|
||||
|
||||
@@ -26,6 +26,7 @@ else:
|
||||
_import_structure["pipeline_qwenimage"] = ["QwenImagePipeline"]
|
||||
_import_structure["pipeline_qwenimage_controlnet"] = ["QwenImageControlNetPipeline"]
|
||||
_import_structure["pipeline_qwenimage_edit"] = ["QwenImageEditPipeline"]
|
||||
_import_structure["pipeline_qwenimage_edit_inpaint"] = ["QwenImageEditInpaintPipeline"]
|
||||
_import_structure["pipeline_qwenimage_img2img"] = ["QwenImageImg2ImgPipeline"]
|
||||
_import_structure["pipeline_qwenimage_inpaint"] = ["QwenImageInpaintPipeline"]
|
||||
|
||||
@@ -39,6 +40,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
from .pipeline_qwenimage import QwenImagePipeline
|
||||
from .pipeline_qwenimage_controlnet import QwenImageControlNetPipeline
|
||||
from .pipeline_qwenimage_edit import QwenImageEditPipeline
|
||||
from .pipeline_qwenimage_edit_inpaint import QwenImageEditInpaintPipeline
|
||||
from .pipeline_qwenimage_img2img import QwenImageImg2ImgPipeline
|
||||
from .pipeline_qwenimage_inpaint import QwenImageInpaintPipeline
|
||||
else:
|
||||
|
||||
@@ -551,6 +551,12 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
|
||||
Function invoked when calling the pipeline for generation.
|
||||
|
||||
Args:
|
||||
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
||||
`Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
|
||||
numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
|
||||
or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
|
||||
list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image
|
||||
latents as `image`, but if passing latents directly it is not encoded again.
|
||||
prompt (`str` or `List[str]`, *optional*):
|
||||
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
||||
instead.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,9 +21,11 @@ from typing import Dict, Optional, Union
|
||||
|
||||
from .bitsandbytes import BnB4BitDiffusersQuantizer, BnB8BitDiffusersQuantizer
|
||||
from .gguf import GGUFQuantizer
|
||||
from .modelopt import NVIDIAModelOptQuantizer
|
||||
from .quantization_config import (
|
||||
BitsAndBytesConfig,
|
||||
GGUFQuantizationConfig,
|
||||
NVIDIAModelOptConfig,
|
||||
QuantizationConfigMixin,
|
||||
QuantizationMethod,
|
||||
QuantoConfig,
|
||||
@@ -39,6 +41,7 @@ AUTO_QUANTIZER_MAPPING = {
|
||||
"gguf": GGUFQuantizer,
|
||||
"quanto": QuantoQuantizer,
|
||||
"torchao": TorchAoHfQuantizer,
|
||||
"modelopt": NVIDIAModelOptQuantizer,
|
||||
}
|
||||
|
||||
AUTO_QUANTIZATION_CONFIG_MAPPING = {
|
||||
@@ -47,6 +50,7 @@ AUTO_QUANTIZATION_CONFIG_MAPPING = {
|
||||
"gguf": GGUFQuantizationConfig,
|
||||
"quanto": QuantoConfig,
|
||||
"torchao": TorchAoConfig,
|
||||
"modelopt": NVIDIAModelOptConfig,
|
||||
}
|
||||
|
||||
|
||||
@@ -137,6 +141,9 @@ class DiffusersAutoQuantizer:
|
||||
if isinstance(quantization_config, dict):
|
||||
quantization_config = cls.from_dict(quantization_config)
|
||||
|
||||
if isinstance(quantization_config, NVIDIAModelOptConfig):
|
||||
quantization_config.check_model_patching()
|
||||
|
||||
if warning_msg != "":
|
||||
warnings.warn(warning_msg)
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
from .modelopt_quantizer import NVIDIAModelOptQuantizer
|
||||
@@ -0,0 +1,190 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Union
|
||||
|
||||
from ...utils import (
|
||||
get_module_from_name,
|
||||
is_accelerate_available,
|
||||
is_nvidia_modelopt_available,
|
||||
is_torch_available,
|
||||
logging,
|
||||
)
|
||||
from ..base import DiffusersQuantizer
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ...models.modeling_utils import ModelMixin
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate.utils import set_module_tensor_to_device
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class NVIDIAModelOptQuantizer(DiffusersQuantizer):
|
||||
r"""
|
||||
Diffusers Quantizer for TensorRT Model Optimizer
|
||||
"""
|
||||
|
||||
use_keep_in_fp32_modules = True
|
||||
requires_calibration = False
|
||||
required_packages = ["nvidia_modelopt"]
|
||||
|
||||
def __init__(self, quantization_config, **kwargs):
|
||||
super().__init__(quantization_config, **kwargs)
|
||||
|
||||
def validate_environment(self, *args, **kwargs):
|
||||
if not is_nvidia_modelopt_available():
|
||||
raise ImportError(
|
||||
"Loading an nvidia-modelopt quantized model requires nvidia-modelopt library (`pip install nvidia-modelopt`)"
|
||||
)
|
||||
|
||||
self.offload = False
|
||||
|
||||
device_map = kwargs.get("device_map", None)
|
||||
if isinstance(device_map, dict):
|
||||
if "cpu" in device_map.values() or "disk" in device_map.values():
|
||||
if self.pre_quantized:
|
||||
raise ValueError(
|
||||
"You are attempting to perform cpu/disk offload with a pre-quantized modelopt model "
|
||||
"This is not supported yet. Please remove the CPU or disk device from the `device_map` argument."
|
||||
)
|
||||
else:
|
||||
self.offload = True
|
||||
|
||||
def check_if_quantized_param(
|
||||
self,
|
||||
model: "ModelMixin",
|
||||
param_value: "torch.Tensor",
|
||||
param_name: str,
|
||||
state_dict: Dict[str, Any],
|
||||
**kwargs,
|
||||
):
|
||||
# ModelOpt imports diffusers internally. This is here to prevent circular imports
|
||||
from modelopt.torch.quantization.utils import is_quantized
|
||||
|
||||
module, tensor_name = get_module_from_name(model, param_name)
|
||||
if self.pre_quantized:
|
||||
return True
|
||||
elif is_quantized(module) and "weight" in tensor_name:
|
||||
return True
|
||||
return False
|
||||
|
||||
def create_quantized_param(
|
||||
self,
|
||||
model: "ModelMixin",
|
||||
param_value: "torch.Tensor",
|
||||
param_name: str,
|
||||
target_device: "torch.device",
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Create the quantized parameter by calling .calibrate() after setting it to the module.
|
||||
"""
|
||||
# ModelOpt imports diffusers internally. This is here to prevent circular imports
|
||||
import modelopt.torch.quantization as mtq
|
||||
|
||||
dtype = kwargs.get("dtype", torch.float32)
|
||||
module, tensor_name = get_module_from_name(model, param_name)
|
||||
if self.pre_quantized:
|
||||
module._parameters[tensor_name] = torch.nn.Parameter(param_value.to(device=target_device))
|
||||
else:
|
||||
set_module_tensor_to_device(model, param_name, target_device, param_value, dtype)
|
||||
mtq.calibrate(
|
||||
module, self.quantization_config.modelopt_config["algorithm"], self.quantization_config.forward_loop
|
||||
)
|
||||
mtq.compress(module)
|
||||
module.weight.requires_grad = False
|
||||
|
||||
def adjust_max_memory(self, max_memory: Dict[str, Union[int, str]]) -> Dict[str, Union[int, str]]:
|
||||
max_memory = {key: val * 0.90 for key, val in max_memory.items()}
|
||||
return max_memory
|
||||
|
||||
def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype":
|
||||
if self.quantization_config.quant_type == "FP8":
|
||||
target_dtype = torch.float8_e4m3fn
|
||||
return target_dtype
|
||||
|
||||
def update_torch_dtype(self, torch_dtype: "torch.dtype" = None) -> "torch.dtype":
|
||||
if torch_dtype is None:
|
||||
logger.info("You did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.")
|
||||
torch_dtype = torch.float32
|
||||
return torch_dtype
|
||||
|
||||
def get_conv_param_names(self, model: "ModelMixin") -> List[str]:
|
||||
"""
|
||||
Get parameter names for all convolutional layers in a HuggingFace ModelMixin. Includes Conv1d/2d/3d and
|
||||
ConvTranspose1d/2d/3d.
|
||||
"""
|
||||
conv_types = (
|
||||
nn.Conv1d,
|
||||
nn.Conv2d,
|
||||
nn.Conv3d,
|
||||
nn.ConvTranspose1d,
|
||||
nn.ConvTranspose2d,
|
||||
nn.ConvTranspose3d,
|
||||
)
|
||||
|
||||
conv_param_names = []
|
||||
for name, module in model.named_modules():
|
||||
if isinstance(module, conv_types):
|
||||
for param_name, _ in module.named_parameters(recurse=False):
|
||||
conv_param_names.append(f"{name}.{param_name}")
|
||||
|
||||
return conv_param_names
|
||||
|
||||
def _process_model_before_weight_loading(
|
||||
self,
|
||||
model: "ModelMixin",
|
||||
device_map,
|
||||
keep_in_fp32_modules: List[str] = [],
|
||||
**kwargs,
|
||||
):
|
||||
# ModelOpt imports diffusers internally. This is here to prevent circular imports
|
||||
import modelopt.torch.opt as mto
|
||||
|
||||
if self.pre_quantized:
|
||||
return
|
||||
|
||||
modules_to_not_convert = self.quantization_config.modules_to_not_convert
|
||||
|
||||
if modules_to_not_convert is None:
|
||||
modules_to_not_convert = []
|
||||
if isinstance(modules_to_not_convert, str):
|
||||
modules_to_not_convert = [modules_to_not_convert]
|
||||
modules_to_not_convert.extend(keep_in_fp32_modules)
|
||||
if self.quantization_config.disable_conv_quantization:
|
||||
modules_to_not_convert.extend(self.get_conv_param_names(model))
|
||||
|
||||
for module in modules_to_not_convert:
|
||||
self.quantization_config.modelopt_config["quant_cfg"]["*" + module + "*"] = {"enable": False}
|
||||
self.quantization_config.modules_to_not_convert = modules_to_not_convert
|
||||
mto.apply_mode(model, mode=[("quantize", self.quantization_config.modelopt_config)])
|
||||
model.config.quantization_config = self.quantization_config
|
||||
|
||||
def _process_model_after_weight_loading(self, model, **kwargs):
|
||||
# ModelOpt imports diffusers internally. This is here to prevent circular imports
|
||||
from modelopt.torch.opt import ModeloptStateManager
|
||||
|
||||
if self.pre_quantized:
|
||||
return model
|
||||
|
||||
for _, m in model.named_modules():
|
||||
if hasattr(m, ModeloptStateManager._state_key) and m is not model:
|
||||
ModeloptStateManager.remove_state(m)
|
||||
|
||||
return model
|
||||
|
||||
@property
|
||||
def is_trainable(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_serializable(self):
|
||||
self.quantization_config.check_model_patching(operation="saving")
|
||||
return True
|
||||
@@ -25,10 +25,11 @@ import importlib.metadata
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from functools import partial
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
from packaging import version
|
||||
|
||||
@@ -46,6 +47,7 @@ class QuantizationMethod(str, Enum):
|
||||
GGUF = "gguf"
|
||||
TORCHAO = "torchao"
|
||||
QUANTO = "quanto"
|
||||
MODELOPT = "modelopt"
|
||||
|
||||
|
||||
if is_torchao_available():
|
||||
@@ -268,7 +270,14 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
|
||||
if bnb_4bit_quant_storage is None:
|
||||
self.bnb_4bit_quant_storage = torch.uint8
|
||||
elif isinstance(bnb_4bit_quant_storage, str):
|
||||
if bnb_4bit_quant_storage not in ["float16", "float32", "int8", "uint8", "float64", "bfloat16"]:
|
||||
if bnb_4bit_quant_storage not in [
|
||||
"float16",
|
||||
"float32",
|
||||
"int8",
|
||||
"uint8",
|
||||
"float64",
|
||||
"bfloat16",
|
||||
]:
|
||||
raise ValueError(
|
||||
"`bnb_4bit_quant_storage` must be a valid string (one of 'float16', 'float32', 'int8', 'uint8', 'float64', 'bfloat16') "
|
||||
)
|
||||
@@ -479,7 +488,12 @@ class TorchAoConfig(QuantizationConfigMixin):
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, quant_type: str, modules_to_not_convert: Optional[List[str]] = None, **kwargs) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
quant_type: str,
|
||||
modules_to_not_convert: Optional[List[str]] = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
self.quant_method = QuantizationMethod.TORCHAO
|
||||
self.quant_type = quant_type
|
||||
self.modules_to_not_convert = modules_to_not_convert
|
||||
@@ -724,3 +738,194 @@ class QuantoConfig(QuantizationConfigMixin):
|
||||
accepted_weights = ["float8", "int8", "int4", "int2"]
|
||||
if self.weights_dtype not in accepted_weights:
|
||||
raise ValueError(f"Only support weights in {accepted_weights} but found {self.weights_dtype}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class NVIDIAModelOptConfig(QuantizationConfigMixin):
|
||||
"""This is a config class to use nvidia modelopt for quantization.
|
||||
|
||||
Args:
|
||||
quant_type (`str`):
|
||||
The type of quantization we want to use, following is how to use:
|
||||
**weightquant_activationquant ==> FP8_FP8** In the above example we have use FP8 for both weight and
|
||||
activation quantization. Following are the all the options:
|
||||
- FP8
|
||||
- INT8
|
||||
- INT4
|
||||
- NF4
|
||||
- NVFP4
|
||||
modules_to_not_convert (`List[str]`, *optional*, default to `None`):
|
||||
The list of modules to not quantize, useful for quantizing models that explicitly require to have some
|
||||
weight_only (`bool`, *optional*, default to `False`):
|
||||
If set to `True`, the quantization will be applied only to the weights of the model.
|
||||
channel_quantize (`int`, *optional*, default to `None`):
|
||||
The channel quantization axis, useful for quantizing models across different axes.
|
||||
block_quantize (`int`, *optional*, default to `None`):
|
||||
The block size, useful to further quantize each channel/axes into blocks.
|
||||
scale_channel_quantize (`int`, *optional*, default to `None`):
|
||||
The scale channel quantization axis, useful for quantizing calculated scale across different axes.
|
||||
scale_block_quantize (`int`, *optional*, default to `None`):
|
||||
The scale block size, useful for quantizing each scale channel/axes into blocks.
|
||||
algorithm (`str`, *optional*, default to `"max"`):
|
||||
The algorithm to use for quantization, currently only supports `"max"`.
|
||||
forward_loop (`Callable`, *optional*, default to `None`):
|
||||
The forward loop function to use for calibration during quantization.
|
||||
modelopt_config (`dict`, *optional*, default to `None`):
|
||||
The modelopt config, useful for passing custom configs to modelopt.
|
||||
disable_conv_quantization (`bool`, *optional*, default to `False`):
|
||||
If set to `True`, the quantization will be disabled for convolutional layers.
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional parameters which are to be used for calibration.
|
||||
"""
|
||||
|
||||
quanttype_to_numbits = {
|
||||
"FP8": (4, 3),
|
||||
"INT8": 8,
|
||||
"INT4": 4,
|
||||
"NF4": 4,
|
||||
"NVFP4": (2, 1),
|
||||
}
|
||||
quanttype_to_scalingbits = {
|
||||
"NF4": 8,
|
||||
"NVFP4": (4, 3),
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
quant_type: str,
|
||||
modules_to_not_convert: Optional[List[str]] = None,
|
||||
weight_only: bool = True,
|
||||
channel_quantize: Optional[int] = None,
|
||||
block_quantize: Optional[int] = None,
|
||||
scale_channel_quantize: Optional[int] = None,
|
||||
scale_block_quantize: Optional[int] = None,
|
||||
algorithm: str = "max",
|
||||
forward_loop: Optional[Callable] = None,
|
||||
modelopt_config: Optional[dict] = None,
|
||||
disable_conv_quantization: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
self.quant_method = QuantizationMethod.MODELOPT
|
||||
self._normalize_quant_type(quant_type)
|
||||
self.modules_to_not_convert = modules_to_not_convert
|
||||
self.weight_only = weight_only
|
||||
self.channel_quantize = channel_quantize
|
||||
self.block_quantize = block_quantize
|
||||
self.calib_cfg = {
|
||||
"method": algorithm,
|
||||
# add more options here if needed
|
||||
}
|
||||
self.forward_loop = forward_loop
|
||||
self.scale_channel_quantize = scale_channel_quantize
|
||||
self.scale_block_quantize = scale_block_quantize
|
||||
self.modelopt_config = self.get_config_from_quant_type() if not modelopt_config else modelopt_config
|
||||
self.disable_conv_quantization = disable_conv_quantization
|
||||
|
||||
def check_model_patching(self, operation: str = "loading"):
|
||||
# ModelOpt imports diffusers internally. This is here to prevent circular imports
|
||||
from modelopt.torch.opt.plugins.huggingface import _PATCHED_CLASSES
|
||||
|
||||
if len(_PATCHED_CLASSES) == 0:
|
||||
warning_msg = (
|
||||
f"Not {operation} weights in modelopt format. This might cause unreliable behavior."
|
||||
"Please make sure to run the following code before loading/saving model weights:\n\n"
|
||||
" from modelopt.torch.opt import enable_huggingface_checkpointing\n"
|
||||
" enable_huggingface_checkpointing()\n"
|
||||
)
|
||||
warnings.warn(warning_msg)
|
||||
|
||||
def _normalize_quant_type(self, quant_type: str) -> str:
|
||||
"""
|
||||
Validates and normalizes the quantization type string.
|
||||
|
||||
Splits the quant_type into weight and activation components, verifies them against supported types, and
|
||||
replaces unsupported values with safe defaults.
|
||||
|
||||
Args:
|
||||
quant_type (str): The input quantization type string (e.g., 'FP8_INT8').
|
||||
|
||||
Returns:
|
||||
str: A valid quantization type string (e.g., 'FP8_INT8' or 'FP8').
|
||||
"""
|
||||
parts = quant_type.split("_")
|
||||
w_type = parts[0]
|
||||
act_type = parts[1] if len(parts) > 1 else None
|
||||
if len(parts) > 2:
|
||||
logger.warning(f"Quantization type {quant_type} is not supported. Picking FP8_INT8 as default")
|
||||
w_type = "FP8"
|
||||
act_type = None
|
||||
else:
|
||||
if w_type not in NVIDIAModelOptConfig.quanttype_to_numbits:
|
||||
logger.warning(f"Weight Quantization type {w_type} is not supported. Picking FP8 as default")
|
||||
w_type = "FP8"
|
||||
if act_type is not None and act_type not in NVIDIAModelOptConfig.quanttype_to_numbits:
|
||||
logger.warning(f"Activation Quantization type {act_type} is not supported. Picking INT8 as default")
|
||||
act_type = None
|
||||
self.quant_type = w_type + ("_" + act_type if act_type is not None else "")
|
||||
|
||||
def get_config_from_quant_type(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the config from the quantization type.
|
||||
"""
|
||||
import modelopt.torch.quantization as mtq
|
||||
|
||||
BASE_CONFIG = {
|
||||
"quant_cfg": {
|
||||
"*weight_quantizer": {"fake_quant": False},
|
||||
"*input_quantizer": {},
|
||||
"*output_quantizer": {"enable": False},
|
||||
"*q_bmm_quantizer": {},
|
||||
"*k_bmm_quantizer": {},
|
||||
"*v_bmm_quantizer": {},
|
||||
"*softmax_quantizer": {},
|
||||
**mtq.config._default_disabled_quantizer_cfg,
|
||||
},
|
||||
"algorithm": self.calib_cfg,
|
||||
}
|
||||
|
||||
quant_cfg = BASE_CONFIG["quant_cfg"]
|
||||
if self.weight_only:
|
||||
for k in quant_cfg:
|
||||
if "*weight_quantizer" not in k and not quant_cfg[k]:
|
||||
quant_cfg[k]["enable"] = False
|
||||
|
||||
parts = self.quant_type.split("_")
|
||||
w_type = parts[0]
|
||||
act_type = parts[1].replace("A", "") if len(parts) > 1 else None
|
||||
for k in quant_cfg:
|
||||
if k not in mtq.config._default_disabled_quantizer_cfg and "enable" not in quant_cfg[k]:
|
||||
if k == "*input_quantizer":
|
||||
if act_type is not None:
|
||||
quant_cfg[k]["num_bits"] = NVIDIAModelOptConfig.quanttype_to_numbits[act_type]
|
||||
continue
|
||||
quant_cfg[k]["num_bits"] = NVIDIAModelOptConfig.quanttype_to_numbits[w_type]
|
||||
|
||||
if self.block_quantize is not None and self.channel_quantize is not None:
|
||||
quant_cfg["*weight_quantizer"]["block_sizes"] = {self.channel_quantize: self.block_quantize}
|
||||
quant_cfg["*input_quantizer"]["block_sizes"] = {
|
||||
self.channel_quantize: self.block_quantize,
|
||||
"type": "dynamic",
|
||||
}
|
||||
elif self.channel_quantize is not None:
|
||||
quant_cfg["*weight_quantizer"]["axis"] = self.channel_quantize
|
||||
quant_cfg["*input_quantizer"]["axis"] = self.channel_quantize
|
||||
quant_cfg["*input_quantizer"]["type"] = "dynamic"
|
||||
|
||||
# Only fixed scaling sizes are supported for now in modelopt
|
||||
if self.scale_channel_quantize is not None and self.scale_block_quantize is not None:
|
||||
if w_type in NVIDIAModelOptConfig.quanttype_to_scalingbits:
|
||||
quant_cfg["*weight_quantizer"]["block_sizes"].update(
|
||||
{
|
||||
"scale_bits": NVIDIAModelOptConfig.quanttype_to_scalingbits[w_type],
|
||||
"scale_block_sizes": {self.scale_channel_quantize: self.scale_block_quantize},
|
||||
}
|
||||
)
|
||||
if act_type and act_type in NVIDIAModelOptConfig.quanttype_to_scalingbits:
|
||||
quant_cfg["*input_quantizer"]["block_sizes"].update(
|
||||
{
|
||||
"scale_bits": NVIDIAModelOptConfig.quanttype_to_scalingbits[act_type],
|
||||
"scale_block_sizes": {self.scale_channel_quantize: self.scale_block_quantize},
|
||||
}
|
||||
)
|
||||
|
||||
return BASE_CONFIG
|
||||
|
||||
@@ -89,6 +89,8 @@ from .import_utils import (
|
||||
is_matplotlib_available,
|
||||
is_nltk_available,
|
||||
is_note_seq_available,
|
||||
is_nvidia_modelopt_available,
|
||||
is_nvidia_modelopt_version,
|
||||
is_onnx_available,
|
||||
is_opencv_available,
|
||||
is_optimum_quanto_available,
|
||||
|
||||
@@ -45,6 +45,8 @@ DIFFUSERS_ATTN_BACKEND = os.getenv("DIFFUSERS_ATTN_BACKEND", "native")
|
||||
DIFFUSERS_ATTN_CHECKS = os.getenv("DIFFUSERS_ATTN_CHECKS", "0") in ENV_VARS_TRUE_VALUES
|
||||
DEFAULT_HF_PARALLEL_LOADING_WORKERS = 8
|
||||
HF_ENABLE_PARALLEL_LOADING = os.environ.get("HF_ENABLE_PARALLEL_LOADING", "").upper() in ENV_VARS_TRUE_VALUES
|
||||
DIFFUSERS_DISABLE_REMOTE_CODE = os.getenv("DIFFUSERS_DISABLE_REMOTE_CODE", "false").lower() in ENV_VARS_TRUE_VALUES
|
||||
DIFFUSERS_ENABLE_HUB_KERNELS = os.environ.get("DIFFUSERS_ENABLE_HUB_KERNELS", "").upper() in ENV_VARS_TRUE_VALUES
|
||||
|
||||
# Below should be `True` if the current version of `peft` and `transformers` are compatible with
|
||||
# PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
# This file is autogenerated by the command `make fix-copies`, do not edit.
|
||||
from ..utils import DummyObject, requires_backends
|
||||
|
||||
|
||||
class NVIDIAModelOptConfig(metaclass=DummyObject):
|
||||
_backends = ["nvidia_modelopt"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["nvidia_modelopt"])
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["nvidia_modelopt"])
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["nvidia_modelopt"])
|
||||
@@ -1772,6 +1772,21 @@ class QwenImageControlNetPipeline(metaclass=DummyObject):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
|
||||
class QwenImageEditInpaintPipeline(metaclass=DummyObject):
|
||||
_backends = ["torch", "transformers"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["torch", "transformers"])
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, *args, **kwargs):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
|
||||
class QwenImageEditPipeline(metaclass=DummyObject):
|
||||
_backends = ["torch", "transformers"]
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
from pathlib import Path
|
||||
@@ -34,6 +33,7 @@ from packaging import version
|
||||
|
||||
from .. import __version__
|
||||
from . import DIFFUSERS_DYNAMIC_MODULE_NAME, HF_MODULES_CACHE, logging
|
||||
from .constants import DIFFUSERS_DISABLE_REMOTE_CODE
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
@@ -159,52 +159,25 @@ def check_imports(filename):
|
||||
return get_relative_imports(filename)
|
||||
|
||||
|
||||
def _raise_timeout_error(signum, frame):
|
||||
raise ValueError(
|
||||
"Loading this model requires you to execute custom code contained in the model repository on your local "
|
||||
"machine. Please set the option `trust_remote_code=True` to permit loading of this model."
|
||||
)
|
||||
|
||||
|
||||
def resolve_trust_remote_code(trust_remote_code, model_name, has_remote_code):
|
||||
if trust_remote_code is None:
|
||||
if has_remote_code and TIME_OUT_REMOTE_CODE > 0:
|
||||
prev_sig_handler = None
|
||||
try:
|
||||
prev_sig_handler = signal.signal(signal.SIGALRM, _raise_timeout_error)
|
||||
signal.alarm(TIME_OUT_REMOTE_CODE)
|
||||
while trust_remote_code is None:
|
||||
answer = input(
|
||||
f"The repository for {model_name} contains custom code which must be executed to correctly "
|
||||
f"load the model. You can inspect the repository content at https://hf.co/{model_name}.\n"
|
||||
f"You can avoid this prompt in future by passing the argument `trust_remote_code=True`.\n\n"
|
||||
f"Do you wish to run the custom code? [y/N] "
|
||||
)
|
||||
if answer.lower() in ["yes", "y", "1"]:
|
||||
trust_remote_code = True
|
||||
elif answer.lower() in ["no", "n", "0", ""]:
|
||||
trust_remote_code = False
|
||||
signal.alarm(0)
|
||||
except Exception:
|
||||
# OS which does not support signal.SIGALRM
|
||||
raise ValueError(
|
||||
f"The repository for {model_name} contains custom code which must be executed to correctly "
|
||||
f"load the model. You can inspect the repository content at https://hf.co/{model_name}.\n"
|
||||
f"Please pass the argument `trust_remote_code=True` to allow custom code to be run."
|
||||
)
|
||||
finally:
|
||||
if prev_sig_handler is not None:
|
||||
signal.signal(signal.SIGALRM, prev_sig_handler)
|
||||
signal.alarm(0)
|
||||
elif has_remote_code:
|
||||
# For the CI which puts the timeout at 0
|
||||
_raise_timeout_error(None, None)
|
||||
trust_remote_code = trust_remote_code and not DIFFUSERS_DISABLE_REMOTE_CODE
|
||||
if DIFFUSERS_DISABLE_REMOTE_CODE:
|
||||
logger.warning(
|
||||
"Downloading remote code is disabled globally via the DIFFUSERS_DISABLE_REMOTE_CODE environment variable. Ignoring `trust_remote_code`."
|
||||
)
|
||||
|
||||
if has_remote_code and not trust_remote_code:
|
||||
raise ValueError(
|
||||
f"Loading {model_name} requires you to execute the configuration file in that"
|
||||
" repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
|
||||
" set the option `trust_remote_code=True` to remove this error."
|
||||
error_msg = f"The repository for {model_name} contains custom code. "
|
||||
error_msg += (
|
||||
"Downloading remote code is disabled globally via the DIFFUSERS_DISABLE_REMOTE_CODE environment variable."
|
||||
if DIFFUSERS_DISABLE_REMOTE_CODE
|
||||
else "Pass `trust_remote_code=True` to allow loading remote code modules."
|
||||
)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
elif has_remote_code and trust_remote_code:
|
||||
logger.warning(
|
||||
f"`trust_remote_code` is enabled. Downloading code from {model_name}. Please ensure you trust the contents of this repository"
|
||||
)
|
||||
|
||||
return trust_remote_code
|
||||
|
||||
@@ -226,6 +226,7 @@ _sageattention_available, _sageattention_version = _is_package_available("sageat
|
||||
_flash_attn_available, _flash_attn_version = _is_package_available("flash_attn")
|
||||
_flash_attn_3_available, _flash_attn_3_version = _is_package_available("flash_attn_3")
|
||||
_kornia_available, _kornia_version = _is_package_available("kornia")
|
||||
_nvidia_modelopt_available, _nvidia_modelopt_version = _is_package_available("modelopt", get_dist_name=True)
|
||||
|
||||
|
||||
def is_torch_available():
|
||||
@@ -364,6 +365,10 @@ def is_optimum_quanto_available():
|
||||
return _optimum_quanto_available
|
||||
|
||||
|
||||
def is_nvidia_modelopt_available():
|
||||
return _nvidia_modelopt_available
|
||||
|
||||
|
||||
def is_timm_available():
|
||||
return _timm_available
|
||||
|
||||
@@ -830,6 +835,21 @@ def is_optimum_quanto_version(operation: str, version: str):
|
||||
return compare_versions(parse(_optimum_quanto_version), operation, version)
|
||||
|
||||
|
||||
def is_nvidia_modelopt_version(operation: str, version: str):
|
||||
"""
|
||||
Compares the current Nvidia ModelOpt version to a given reference with an operation.
|
||||
|
||||
Args:
|
||||
operation (`str`):
|
||||
A string representation of an operator, such as `">"` or `"<="`
|
||||
version (`str`):
|
||||
A version string
|
||||
"""
|
||||
if not _nvidia_modelopt_available:
|
||||
return False
|
||||
return compare_versions(parse(_nvidia_modelopt_version), operation, version)
|
||||
|
||||
|
||||
def is_xformers_version(operation: str, version: str):
|
||||
"""
|
||||
Compares the current xformers version to a given reference with an operation.
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
from ..utils import get_logger
|
||||
from .import_utils import is_kernels_available
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
_DEFAULT_HUB_ID_FA3 = "kernels-community/flash-attn3"
|
||||
|
||||
|
||||
def _get_fa3_from_hub():
|
||||
if not is_kernels_available():
|
||||
return None
|
||||
else:
|
||||
from kernels import get_kernel
|
||||
|
||||
try:
|
||||
# TODO: temporary revision for now. Remove when merged upstream into `main`.
|
||||
flash_attn_3_hub = get_kernel(_DEFAULT_HUB_ID_FA3, revision="fake-ops-return-probs")
|
||||
return flash_attn_3_hub
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred while fetching kernel '{_DEFAULT_HUB_ID_FA3}' from the Hub: {e}")
|
||||
raise
|
||||
@@ -38,6 +38,7 @@ from .import_utils import (
|
||||
is_gguf_available,
|
||||
is_kernels_available,
|
||||
is_note_seq_available,
|
||||
is_nvidia_modelopt_available,
|
||||
is_onnx_available,
|
||||
is_opencv_available,
|
||||
is_optimum_quanto_available,
|
||||
@@ -66,7 +67,10 @@ else:
|
||||
global_rng = random.Random()
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
logger.warning(
|
||||
"diffusers.utils.testing_utils' is deprecated and will be removed in a future version. "
|
||||
"Determinism and device backend utilities have been moved to `diffusers.utils.torch_utils`. "
|
||||
)
|
||||
_required_peft_version = is_peft_available() and version.parse(
|
||||
version.parse(importlib.metadata.version("peft")).base_version
|
||||
) > version.parse("0.5")
|
||||
@@ -635,6 +639,18 @@ def require_torchao_version_greater_or_equal(torchao_version):
|
||||
return decorator
|
||||
|
||||
|
||||
def require_modelopt_version_greater_or_equal(modelopt_version):
|
||||
def decorator(test_case):
|
||||
correct_nvidia_modelopt_version = is_nvidia_modelopt_available() and version.parse(
|
||||
version.parse(importlib.metadata.version("modelopt")).base_version
|
||||
) >= version.parse(modelopt_version)
|
||||
return unittest.skipUnless(
|
||||
correct_nvidia_modelopt_version, f"Test requires modelopt with version greater than {modelopt_version}."
|
||||
)(test_case)
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def require_kernels_version_greater_or_equal(kernels_version):
|
||||
def decorator(test_case):
|
||||
correct_kernels_version = is_kernels_available() and version.parse(
|
||||
@@ -801,10 +817,9 @@ def export_to_ply(mesh, output_ply_path: str = None):
|
||||
f.write(format.pack(*vertex))
|
||||
|
||||
if faces is not None:
|
||||
format = struct.Struct("<B3I")
|
||||
for tri in faces.tolist():
|
||||
f.write(format.pack(len(tri), *tri))
|
||||
|
||||
format = struct.Struct("<B3I")
|
||||
return output_ply_path
|
||||
|
||||
|
||||
@@ -1144,23 +1159,23 @@ def enable_full_determinism():
|
||||
Helper function for reproducible behavior during distributed training. See
|
||||
- https://pytorch.org/docs/stable/notes/randomness.html for pytorch
|
||||
"""
|
||||
# Enable PyTorch deterministic mode. This potentially requires either the environment
|
||||
# variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set,
|
||||
# depending on the CUDA version, so we set them both here
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
|
||||
torch.use_deterministic_algorithms(True)
|
||||
from .torch_utils import enable_full_determinism as _enable_full_determinism
|
||||
|
||||
# Enable CUDNN deterministic mode
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
torch.backends.cuda.matmul.allow_tf32 = False
|
||||
logger.warning(
|
||||
"enable_full_determinism has been moved to diffusers.utils.torch_utils. "
|
||||
"Importing from diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _enable_full_determinism()
|
||||
|
||||
|
||||
def disable_full_determinism():
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "0"
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ""
|
||||
torch.use_deterministic_algorithms(False)
|
||||
from .torch_utils import disable_full_determinism as _disable_full_determinism
|
||||
|
||||
logger.warning(
|
||||
"disable_full_determinism has been moved to diffusers.utils.torch_utils. "
|
||||
"Importing from diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _disable_full_determinism()
|
||||
|
||||
|
||||
# Utils for custom and alternative accelerator devices
|
||||
@@ -1282,43 +1297,85 @@ def _device_agnostic_dispatch(device: str, dispatch_table: Dict[str, Callable],
|
||||
|
||||
# These are callables which automatically dispatch the function specific to the accelerator
|
||||
def backend_manual_seed(device: str, seed: int):
|
||||
return _device_agnostic_dispatch(device, BACKEND_MANUAL_SEED, seed)
|
||||
from .torch_utils import backend_manual_seed as _backend_manual_seed
|
||||
|
||||
logger.warning(
|
||||
"backend_manual_seed has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_manual_seed(device, seed)
|
||||
|
||||
|
||||
def backend_synchronize(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_SYNCHRONIZE)
|
||||
from .torch_utils import backend_synchronize as _backend_synchronize
|
||||
|
||||
logger.warning(
|
||||
"backend_synchronize has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_synchronize(device)
|
||||
|
||||
|
||||
def backend_empty_cache(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_EMPTY_CACHE)
|
||||
from .torch_utils import backend_empty_cache as _backend_empty_cache
|
||||
|
||||
logger.warning(
|
||||
"backend_empty_cache has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_empty_cache(device)
|
||||
|
||||
|
||||
def backend_device_count(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_DEVICE_COUNT)
|
||||
from .torch_utils import backend_device_count as _backend_device_count
|
||||
|
||||
logger.warning(
|
||||
"backend_device_count has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_device_count(device)
|
||||
|
||||
|
||||
def backend_reset_peak_memory_stats(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_RESET_PEAK_MEMORY_STATS)
|
||||
from .torch_utils import backend_reset_peak_memory_stats as _backend_reset_peak_memory_stats
|
||||
|
||||
logger.warning(
|
||||
"backend_reset_peak_memory_stats has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_reset_peak_memory_stats(device)
|
||||
|
||||
|
||||
def backend_reset_max_memory_allocated(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_RESET_MAX_MEMORY_ALLOCATED)
|
||||
from .torch_utils import backend_reset_max_memory_allocated as _backend_reset_max_memory_allocated
|
||||
|
||||
logger.warning(
|
||||
"backend_reset_max_memory_allocated has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_reset_max_memory_allocated(device)
|
||||
|
||||
|
||||
def backend_max_memory_allocated(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_MAX_MEMORY_ALLOCATED)
|
||||
from .torch_utils import backend_max_memory_allocated as _backend_max_memory_allocated
|
||||
|
||||
logger.warning(
|
||||
"backend_max_memory_allocated has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_max_memory_allocated(device)
|
||||
|
||||
|
||||
# These are callables which return boolean behaviour flags and can be used to specify some
|
||||
# device agnostic alternative where the feature is unsupported.
|
||||
def backend_supports_training(device: str):
|
||||
if not is_torch_available():
|
||||
return False
|
||||
from .torch_utils import backend_supports_training as _backend_supports_training
|
||||
|
||||
if device not in BACKEND_SUPPORTS_TRAINING:
|
||||
device = "default"
|
||||
|
||||
return BACKEND_SUPPORTS_TRAINING[device]
|
||||
logger.warning(
|
||||
"backend_supports_training has been moved to diffusers.utils.torch_utils. "
|
||||
"diffusers.utils.testing_utils is deprecated and will be removed in a future version."
|
||||
)
|
||||
return _backend_supports_training(device)
|
||||
|
||||
|
||||
# Guard for when Torch is not available
|
||||
|
||||
@@ -16,7 +16,8 @@ PyTorch utilities: Utilities related to PyTorch
|
||||
"""
|
||||
|
||||
import functools
|
||||
from typing import List, Optional, Tuple, Union
|
||||
import os
|
||||
from typing import Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from . import logging
|
||||
from .import_utils import is_torch_available, is_torch_npu_available, is_torch_version
|
||||
@@ -26,6 +27,56 @@ if is_torch_available():
|
||||
import torch
|
||||
from torch.fft import fftn, fftshift, ifftn, ifftshift
|
||||
|
||||
BACKEND_SUPPORTS_TRAINING = {"cuda": True, "xpu": True, "cpu": True, "mps": False, "default": True}
|
||||
BACKEND_EMPTY_CACHE = {
|
||||
"cuda": torch.cuda.empty_cache,
|
||||
"xpu": torch.xpu.empty_cache,
|
||||
"cpu": None,
|
||||
"mps": torch.mps.empty_cache,
|
||||
"default": None,
|
||||
}
|
||||
BACKEND_DEVICE_COUNT = {
|
||||
"cuda": torch.cuda.device_count,
|
||||
"xpu": torch.xpu.device_count,
|
||||
"cpu": lambda: 0,
|
||||
"mps": lambda: 0,
|
||||
"default": 0,
|
||||
}
|
||||
BACKEND_MANUAL_SEED = {
|
||||
"cuda": torch.cuda.manual_seed,
|
||||
"xpu": torch.xpu.manual_seed,
|
||||
"cpu": torch.manual_seed,
|
||||
"mps": torch.mps.manual_seed,
|
||||
"default": torch.manual_seed,
|
||||
}
|
||||
BACKEND_RESET_PEAK_MEMORY_STATS = {
|
||||
"cuda": torch.cuda.reset_peak_memory_stats,
|
||||
"xpu": getattr(torch.xpu, "reset_peak_memory_stats", None),
|
||||
"cpu": None,
|
||||
"mps": None,
|
||||
"default": None,
|
||||
}
|
||||
BACKEND_RESET_MAX_MEMORY_ALLOCATED = {
|
||||
"cuda": torch.cuda.reset_max_memory_allocated,
|
||||
"xpu": getattr(torch.xpu, "reset_peak_memory_stats", None),
|
||||
"cpu": None,
|
||||
"mps": None,
|
||||
"default": None,
|
||||
}
|
||||
BACKEND_MAX_MEMORY_ALLOCATED = {
|
||||
"cuda": torch.cuda.max_memory_allocated,
|
||||
"xpu": getattr(torch.xpu, "max_memory_allocated", None),
|
||||
"cpu": 0,
|
||||
"mps": 0,
|
||||
"default": 0,
|
||||
}
|
||||
BACKEND_SYNCHRONIZE = {
|
||||
"cuda": torch.cuda.synchronize,
|
||||
"xpu": getattr(torch.xpu, "synchronize", None),
|
||||
"cpu": None,
|
||||
"mps": None,
|
||||
"default": None,
|
||||
}
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
try:
|
||||
@@ -36,6 +87,62 @@ except (ImportError, ModuleNotFoundError):
|
||||
return cls
|
||||
|
||||
|
||||
# This dispatches a defined function according to the accelerator from the function definitions.
|
||||
def _device_agnostic_dispatch(device: str, dispatch_table: Dict[str, Callable], *args, **kwargs):
|
||||
if device not in dispatch_table:
|
||||
return dispatch_table["default"](*args, **kwargs)
|
||||
|
||||
fn = dispatch_table[device]
|
||||
|
||||
# Some device agnostic functions return values. Need to guard against 'None' instead at
|
||||
# user level
|
||||
if not callable(fn):
|
||||
return fn
|
||||
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
|
||||
# These are callables which automatically dispatch the function specific to the accelerator
|
||||
def backend_manual_seed(device: str, seed: int):
|
||||
return _device_agnostic_dispatch(device, BACKEND_MANUAL_SEED, seed)
|
||||
|
||||
|
||||
def backend_synchronize(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_SYNCHRONIZE)
|
||||
|
||||
|
||||
def backend_empty_cache(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_EMPTY_CACHE)
|
||||
|
||||
|
||||
def backend_device_count(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_DEVICE_COUNT)
|
||||
|
||||
|
||||
def backend_reset_peak_memory_stats(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_RESET_PEAK_MEMORY_STATS)
|
||||
|
||||
|
||||
def backend_reset_max_memory_allocated(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_RESET_MAX_MEMORY_ALLOCATED)
|
||||
|
||||
|
||||
def backend_max_memory_allocated(device: str):
|
||||
return _device_agnostic_dispatch(device, BACKEND_MAX_MEMORY_ALLOCATED)
|
||||
|
||||
|
||||
# These are callables which return boolean behaviour flags and can be used to specify some
|
||||
# device agnostic alternative where the feature is unsupported.
|
||||
def backend_supports_training(device: str):
|
||||
if not is_torch_available():
|
||||
return False
|
||||
|
||||
if device not in BACKEND_SUPPORTS_TRAINING:
|
||||
device = "default"
|
||||
|
||||
return BACKEND_SUPPORTS_TRAINING[device]
|
||||
|
||||
|
||||
def randn_tensor(
|
||||
shape: Union[Tuple, List],
|
||||
generator: Optional[Union[List["torch.Generator"], "torch.Generator"]] = None,
|
||||
@@ -197,3 +304,31 @@ def device_synchronize(device_type: Optional[str] = None):
|
||||
device_type = get_device()
|
||||
device_mod = getattr(torch, device_type, torch.cuda)
|
||||
device_mod.synchronize()
|
||||
|
||||
|
||||
def enable_full_determinism():
|
||||
"""
|
||||
Helper function for reproducible behavior during distributed training. See
|
||||
- https://pytorch.org/docs/stable/notes/randomness.html for pytorch
|
||||
"""
|
||||
# Enable PyTorch deterministic mode. This potentially requires either the environment
|
||||
# variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set,
|
||||
# depending on the CUDA version, so we set them both here
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
|
||||
torch.use_deterministic_algorithms(True)
|
||||
|
||||
# Enable CUDNN deterministic mode
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
torch.backends.cuda.matmul.allow_tf32 = False
|
||||
|
||||
|
||||
def disable_full_determinism():
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "0"
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ""
|
||||
torch.use_deterministic_algorithms(False)
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
torch_device = get_device()
|
||||
|
||||
+2
-2
@@ -35,13 +35,13 @@ def pytest_configure(config):
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
from diffusers.utils.testing_utils import pytest_addoption_shared
|
||||
from .testing_utils import pytest_addoption_shared
|
||||
|
||||
pytest_addoption_shared(parser)
|
||||
|
||||
|
||||
def pytest_terminal_summary(terminalreporter):
|
||||
from diffusers.utils.testing_utils import pytest_terminal_summary_main
|
||||
from .testing_utils import pytest_terminal_summary_main
|
||||
|
||||
make_reports = terminalreporter.config.getoption("--make-reports")
|
||||
if make_reports:
|
||||
|
||||
@@ -24,7 +24,8 @@ from diffusers.models import ModelMixin
|
||||
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.utils import get_logger
|
||||
from diffusers.utils.import_utils import compare_versions
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
|
||||
@@ -20,7 +20,8 @@ import torch
|
||||
from diffusers.hooks import HookRegistry, ModelHook
|
||||
from diffusers.training_utils import free_memory
|
||||
from diffusers.utils.logging import get_logger
|
||||
from diffusers.utils.testing_utils import CaptureLogger, torch_device
|
||||
|
||||
from ..testing_utils import CaptureLogger, torch_device
|
||||
|
||||
|
||||
logger = get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
@@ -23,7 +23,8 @@ from diffusers import (
|
||||
AuraFlowTransformer2DModel,
|
||||
FlowMatchEulerDiscreteScheduler,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
floats_tensor,
|
||||
is_peft_available,
|
||||
require_peft_backend,
|
||||
@@ -35,7 +36,7 @@ if is_peft_available():
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -26,7 +26,8 @@ from diffusers import (
|
||||
CogVideoXPipeline,
|
||||
CogVideoXTransformer3DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
floats_tensor,
|
||||
require_peft_backend,
|
||||
require_torch_accelerator,
|
||||
@@ -35,7 +36,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -22,7 +22,8 @@ from parameterized import parameterized
|
||||
from transformers import AutoTokenizer, GlmModel
|
||||
|
||||
from diffusers import AutoencoderKL, CogView4Pipeline, CogView4Transformer2DModel, FlowMatchEulerDiscreteScheduler
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
floats_tensor,
|
||||
require_peft_backend,
|
||||
require_torch_accelerator,
|
||||
@@ -33,7 +34,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
class TokenizerWrapper:
|
||||
|
||||
@@ -28,7 +28,8 @@ from transformers import AutoTokenizer, CLIPTextModel, CLIPTokenizer, T5EncoderM
|
||||
|
||||
from diffusers import FlowMatchEulerDiscreteScheduler, FluxControlPipeline, FluxPipeline, FluxTransformer2DModel
|
||||
from diffusers.utils import load_image, logging
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
CaptureLogger,
|
||||
backend_empty_cache,
|
||||
floats_tensor,
|
||||
@@ -48,11 +49,11 @@ if is_peft_available():
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
class FluxLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
pipeline_class = FluxPipeline
|
||||
scheduler_cls = FlowMatchEulerDiscreteScheduler()
|
||||
scheduler_kwargs = {}
|
||||
@@ -122,7 +123,7 @@ class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
|
||||
output_no_lora = self.get_base_pipeline_output(FlowMatchEulerDiscreteScheduler)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe.transformer.add_adapter(denoiser_lora_config)
|
||||
@@ -170,7 +171,7 @@ class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
|
||||
output_no_lora = self.get_base_pipeline_output(FlowMatchEulerDiscreteScheduler)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
# Modify the config to have a layer which won't be present in the second LoRA we will load.
|
||||
@@ -219,7 +220,7 @@ class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
|
||||
output_no_lora = self.get_base_pipeline_output(FlowMatchEulerDiscreteScheduler)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
# Modify the config to have a layer which won't be present in the first LoRA we will load.
|
||||
@@ -279,7 +280,7 @@ class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
pass
|
||||
|
||||
|
||||
class FluxControlLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
class FluxControlLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
pipeline_class = FluxControlPipeline
|
||||
scheduler_cls = FlowMatchEulerDiscreteScheduler()
|
||||
scheduler_kwargs = {}
|
||||
@@ -330,6 +331,7 @@ class FluxControlLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
noise = floats_tensor((batch_size, num_channels) + sizes)
|
||||
input_ids = torch.randint(1, sequence_length, size=(batch_size, sequence_length), generator=generator)
|
||||
|
||||
np.random.seed(0)
|
||||
pipeline_inputs = {
|
||||
"prompt": "A painting of a squirrel eating a burger",
|
||||
"control_image": Image.fromarray(np.random.randint(0, 255, size=(32, 32, 3), dtype="uint8")),
|
||||
@@ -355,7 +357,7 @@ class FluxControlLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
logger = logging.get_logger("diffusers.loaders.lora_pipeline")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
original_output = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
original_output = self.get_base_pipeline_output(FlowMatchEulerDiscreteScheduler)
|
||||
|
||||
for norm_layer in ["norm_q", "norm_k", "norm_added_q", "norm_added_k"]:
|
||||
norm_state_dict = {}
|
||||
@@ -641,7 +643,7 @@ class FluxControlLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
original_output = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
original_output = self.get_base_pipeline_output(FlowMatchEulerDiscreteScheduler)
|
||||
|
||||
out_features, in_features = pipe.transformer.x_embedder.weight.shape
|
||||
rank = 4
|
||||
|
||||
@@ -26,7 +26,8 @@ from diffusers import (
|
||||
HunyuanVideoPipeline,
|
||||
HunyuanVideoTransformer3DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
Expectations,
|
||||
backend_empty_cache,
|
||||
floats_tensor,
|
||||
@@ -42,7 +43,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -24,12 +24,13 @@ from diffusers import (
|
||||
LTXPipeline,
|
||||
LTXVideoTransformer3DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import floats_tensor, require_peft_backend
|
||||
|
||||
from ..testing_utils import floats_tensor, require_peft_backend
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -26,12 +26,13 @@ from diffusers import (
|
||||
Lumina2Pipeline,
|
||||
Lumina2Transformer2DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import floats_tensor, is_torch_version, require_peft_backend, skip_mps, torch_device
|
||||
|
||||
from ..testing_utils import floats_tensor, is_torch_version, require_peft_backend, skip_mps, torch_device
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -19,7 +19,8 @@ import torch
|
||||
from transformers import AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
floats_tensor,
|
||||
require_peft_backend,
|
||||
skip_mps,
|
||||
@@ -28,7 +29,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -24,12 +24,13 @@ from diffusers import (
|
||||
QwenImagePipeline,
|
||||
QwenImageTransformer2DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import floats_tensor, require_peft_backend
|
||||
|
||||
from ..testing_utils import floats_tensor, require_peft_backend
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -19,16 +19,17 @@ import torch
|
||||
from transformers import Gemma2Model, GemmaTokenizer
|
||||
|
||||
from diffusers import AutoencoderDC, FlowMatchEulerDiscreteScheduler, SanaPipeline, SanaTransformer2DModel
|
||||
from diffusers.utils.testing_utils import floats_tensor, require_peft_backend
|
||||
|
||||
from ..testing_utils import floats_tensor, require_peft_backend
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
class SanaLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
class SanaLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
pipeline_class = SanaPipeline
|
||||
scheduler_cls = FlowMatchEulerDiscreteScheduler(shift=7.0)
|
||||
scheduler_kwargs = {}
|
||||
|
||||
@@ -32,7 +32,8 @@ from diffusers import (
|
||||
StableDiffusionPipeline,
|
||||
)
|
||||
from diffusers.utils.import_utils import is_accelerate_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
Expectations,
|
||||
backend_empty_cache,
|
||||
load_image,
|
||||
@@ -47,7 +48,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
|
||||
@@ -28,7 +28,8 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils import load_image
|
||||
from diffusers.utils.import_utils import is_accelerate_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
backend_empty_cache,
|
||||
is_flaky,
|
||||
nightly,
|
||||
@@ -42,7 +43,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
|
||||
@@ -35,7 +35,8 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils.import_utils import is_accelerate_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
CaptureLogger,
|
||||
backend_empty_cache,
|
||||
is_flaky,
|
||||
@@ -51,7 +52,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set, state_dicts_almost_equal # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set, state_dicts_almost_equal # noqa: E402
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
|
||||
@@ -24,7 +24,8 @@ from diffusers import (
|
||||
WanPipeline,
|
||||
WanTransformer3DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
floats_tensor,
|
||||
require_peft_backend,
|
||||
skip_mps,
|
||||
@@ -33,7 +34,7 @@ from diffusers.utils.testing_utils import (
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
@@ -25,7 +25,8 @@ from transformers import AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLWan, FlowMatchEulerDiscreteScheduler, WanVACEPipeline, WanVACETransformer3DModel
|
||||
from diffusers.utils.import_utils import is_peft_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
floats_tensor,
|
||||
is_flaky,
|
||||
require_peft_backend,
|
||||
@@ -40,7 +41,7 @@ if is_peft_available():
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
from .utils import PeftLoraLoaderMixinTests # noqa: E402
|
||||
|
||||
|
||||
@require_peft_backend
|
||||
|
||||
+67
-32
@@ -32,7 +32,8 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils.import_utils import is_peft_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
CaptureLogger,
|
||||
check_if_dicts_are_equal,
|
||||
floats_tensor,
|
||||
@@ -128,6 +129,46 @@ class PeftLoraLoaderMixinTests:
|
||||
text_encoder_target_modules = ["q_proj", "k_proj", "v_proj", "out_proj"]
|
||||
denoiser_target_modules = ["to_q", "to_k", "to_v", "to_out.0"]
|
||||
|
||||
def setUp(self):
|
||||
self._cache_base_pipeline_output()
|
||||
super().setUp()
|
||||
|
||||
def _cache_base_pipeline_output(self):
|
||||
# Get or create the cache on the class (not instance)
|
||||
if not hasattr(type(self), "cached_base_pipe_outs"):
|
||||
setattr(type(self), "cached_base_pipe_outs", {})
|
||||
|
||||
cached_base_pipe_outs = type(self).cached_base_pipe_outs
|
||||
|
||||
all_scheduler_names = [scheduler_cls.__name__ for scheduler_cls in self.scheduler_classes]
|
||||
if cached_base_pipe_outs and all(k in cached_base_pipe_outs for k in all_scheduler_names):
|
||||
return
|
||||
|
||||
for scheduler_cls in self.scheduler_classes:
|
||||
if scheduler_cls.__name__ in cached_base_pipe_outs:
|
||||
continue
|
||||
|
||||
components, _, _ = self.get_dummy_components(scheduler_cls)
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
# Always ensure the inputs are without the `generator`. Make sure to pass the `generator`
|
||||
# explicitly.
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
cached_base_pipe_outs[scheduler_cls.__name__] = output_no_lora
|
||||
|
||||
# Update the class attribute
|
||||
setattr(type(self), "cached_base_pipe_outs", cached_base_pipe_outs)
|
||||
|
||||
def get_base_pipeline_output(self, scheduler_cls):
|
||||
"""
|
||||
Returns the cached base pipeline output for the given scheduler.
|
||||
Cache is populated during setUp, so this just retrieves the value.
|
||||
"""
|
||||
return self.cached_base_pipe_outs[scheduler_cls.__name__]
|
||||
|
||||
def get_dummy_components(self, scheduler_cls=None, use_dora=False, lora_alpha=None):
|
||||
if self.unet_kwargs and self.transformer_kwargs:
|
||||
raise ValueError("Both `unet_kwargs` and `transformer_kwargs` cannot be specified.")
|
||||
@@ -319,13 +360,7 @@ class PeftLoraLoaderMixinTests:
|
||||
Tests a simple inference and makes sure it works as expected
|
||||
"""
|
||||
for scheduler_cls in self.scheduler_classes:
|
||||
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
|
||||
pipe = self.pipeline_class(**components)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
_, _, inputs = self.get_dummy_inputs()
|
||||
output_no_lora = pipe(**inputs)[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
def test_simple_inference_with_text_lora(self):
|
||||
@@ -340,7 +375,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None)
|
||||
@@ -423,7 +458,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config)
|
||||
@@ -479,7 +514,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None)
|
||||
@@ -517,7 +552,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None)
|
||||
@@ -549,7 +584,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None)
|
||||
@@ -584,7 +619,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None)
|
||||
@@ -635,7 +670,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None)
|
||||
@@ -686,7 +721,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None)
|
||||
@@ -729,7 +764,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config)
|
||||
@@ -770,7 +805,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config)
|
||||
@@ -814,7 +849,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config)
|
||||
@@ -852,7 +887,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config)
|
||||
@@ -931,7 +966,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
|
||||
if "text_encoder" in self.pipeline_class._lora_loadable_modules:
|
||||
pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
|
||||
@@ -1060,7 +1095,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
|
||||
pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
|
||||
self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
|
||||
@@ -1117,7 +1152,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
|
||||
if "text_encoder" in self.pipeline_class._lora_loadable_modules:
|
||||
pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
|
||||
@@ -1280,7 +1315,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
|
||||
if "text_encoder" in self.pipeline_class._lora_loadable_modules:
|
||||
pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
|
||||
@@ -1374,7 +1409,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
|
||||
if "text_encoder" in self.pipeline_class._lora_loadable_modules:
|
||||
pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
|
||||
@@ -1618,7 +1653,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
if "text_encoder" in self.pipeline_class._lora_loadable_modules:
|
||||
@@ -1699,7 +1734,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
if "text_encoder" in self.pipeline_class._lora_loadable_modules:
|
||||
@@ -1754,7 +1789,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_dora_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_dora_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_dora_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config)
|
||||
@@ -1886,7 +1921,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
original_out = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
original_out = self.get_base_pipeline_output(scheduler_cls)
|
||||
|
||||
no_op_state_dict = {"lora_foo": torch.tensor(2.0), "lora_bar": torch.tensor(3.0)}
|
||||
logger = logging.get_logger("diffusers.loaders.peft")
|
||||
@@ -1932,7 +1967,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config)
|
||||
@@ -2286,7 +2321,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe = self.pipeline_class(**components).to(torch_device)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
self.assertTrue(output_no_lora.shape == self.output_shape)
|
||||
|
||||
pipe, _ = self.add_adapters_to_pipeline(
|
||||
@@ -2336,7 +2371,7 @@ class PeftLoraLoaderMixinTests:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_, _, inputs = self.get_dummy_inputs(with_generator=False)
|
||||
|
||||
output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0]
|
||||
output_no_lora = self.get_base_pipeline_output(scheduler_cls)
|
||||
|
||||
if "text_encoder" in self.pipeline_class._lora_loadable_modules:
|
||||
pipe.text_encoder.add_adapter(text_lora_config)
|
||||
|
||||
@@ -21,7 +21,8 @@ from parameterized import parameterized
|
||||
|
||||
from diffusers import AsymmetricAutoencoderKL
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
Expectations,
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
@@ -34,7 +35,6 @@ from diffusers.utils.testing_utils import (
|
||||
torch_all_close,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@
|
||||
import unittest
|
||||
|
||||
from diffusers import AutoencoderKLCosmos
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device
|
||||
|
||||
from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -16,12 +16,12 @@
|
||||
import unittest
|
||||
|
||||
from diffusers import AutoencoderDC
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -19,12 +19,12 @@ import torch
|
||||
|
||||
from diffusers import AutoencoderKLHunyuanVideo
|
||||
from diffusers.models.autoencoders.autoencoder_kl_hunyuan_video import prepare_causal_attention_mask
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ from parameterized import parameterized
|
||||
|
||||
from diffusers import AutoencoderKL
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
@@ -34,7 +35,6 @@ from diffusers.utils.testing_utils import (
|
||||
torch_all_close,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -18,12 +18,12 @@ import unittest
|
||||
import torch
|
||||
|
||||
from diffusers import AutoencoderKLCogVideoX
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -16,12 +16,12 @@
|
||||
import unittest
|
||||
|
||||
from diffusers import AutoencoderKLTemporalDecoder
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -18,12 +18,12 @@ import unittest
|
||||
import torch
|
||||
|
||||
from diffusers import AutoencoderKLLTXVideo
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
import unittest
|
||||
|
||||
from diffusers import AutoencoderKLMagvit
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device
|
||||
|
||||
from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -16,12 +16,12 @@
|
||||
import unittest
|
||||
|
||||
from diffusers import AutoencoderKLMochi
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ from datasets import load_dataset
|
||||
from parameterized import parameterized
|
||||
|
||||
from diffusers import AutoencoderOobleck
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
@@ -29,7 +30,6 @@ from diffusers.utils.testing_utils import (
|
||||
torch_all_close,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ import torch
|
||||
from parameterized import parameterized
|
||||
|
||||
from diffusers import AutoencoderTiny
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
@@ -30,7 +31,6 @@ from diffusers.utils.testing_utils import (
|
||||
torch_all_close,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -18,8 +18,8 @@ import unittest
|
||||
import torch
|
||||
|
||||
from diffusers import AutoencoderKLWan
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device
|
||||
|
||||
from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,9 @@ import numpy as np
|
||||
import torch
|
||||
|
||||
from diffusers import ConsistencyDecoderVAE, StableDiffusionPipeline
|
||||
from diffusers.utils.testing_utils import (
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
from ...testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
load_image,
|
||||
@@ -28,8 +30,6 @@ from diffusers.utils.testing_utils import (
|
||||
torch_all_close,
|
||||
torch_device,
|
||||
)
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -18,13 +18,13 @@ import unittest
|
||||
import torch
|
||||
|
||||
from diffusers import VQModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ...testing_utils import (
|
||||
backend_manual_seed,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,8 @@ import torch
|
||||
|
||||
from diffusers import DiffusionPipeline
|
||||
from diffusers.models.attention_processor import Attention, AttnAddedKVProcessor
|
||||
from diffusers.utils.testing_utils import torch_device
|
||||
|
||||
from ..testing_utils import torch_device
|
||||
|
||||
|
||||
class AttnAddedKVProcessorTests(unittest.TestCase):
|
||||
|
||||
@@ -24,7 +24,8 @@ from diffusers.models.attention import GEGLU, AdaLayerNorm, ApproximateGELU
|
||||
from diffusers.models.embeddings import get_timestep_embedding
|
||||
from diffusers.models.resnet import Downsample2D, ResnetBlock2D, Upsample2D
|
||||
from diffusers.models.transformers.transformer_2d import Transformer2DModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
|
||||
from ..testing_utils import (
|
||||
backend_manual_seed,
|
||||
require_torch_accelerator_with_fp64,
|
||||
require_torch_version_greater_equal,
|
||||
|
||||
@@ -59,7 +59,10 @@ from diffusers.utils import (
|
||||
logging,
|
||||
)
|
||||
from diffusers.utils.hub_utils import _add_variant
|
||||
from diffusers.utils.testing_utils import (
|
||||
from diffusers.utils.torch_utils import get_torch_cuda_device_capability
|
||||
|
||||
from ..others.test_utils import TOKEN, USER, is_staging_test
|
||||
from ..testing_utils import (
|
||||
CaptureLogger,
|
||||
_check_safetensors_serialization,
|
||||
backend_empty_cache,
|
||||
@@ -82,9 +85,6 @@ from diffusers.utils.testing_utils import (
|
||||
torch_all_close,
|
||||
torch_device,
|
||||
)
|
||||
from diffusers.utils.torch_utils import get_torch_cuda_device_capability
|
||||
|
||||
from ..others.test_utils import TOKEN, USER, is_staging_test
|
||||
|
||||
|
||||
if is_peft_available():
|
||||
@@ -2059,6 +2059,7 @@ class TorchCompileTesterMixin:
|
||||
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
|
||||
model = self.model_class(**init_dict).to(torch_device)
|
||||
model.eval()
|
||||
model = torch.compile(model, fullgraph=True)
|
||||
|
||||
with (
|
||||
@@ -2076,6 +2077,7 @@ class TorchCompileTesterMixin:
|
||||
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
|
||||
model = self.model_class(**init_dict).to(torch_device)
|
||||
model.eval()
|
||||
model.compile_repeated_blocks(fullgraph=True)
|
||||
|
||||
recompile_limit = 1
|
||||
@@ -2098,7 +2100,6 @@ class TorchCompileTesterMixin:
|
||||
|
||||
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
model = self.model_class(**init_dict)
|
||||
|
||||
model.eval()
|
||||
# TODO: Can test for other group offloading kwargs later if needed.
|
||||
group_offload_kwargs = {
|
||||
@@ -2111,11 +2112,11 @@ class TorchCompileTesterMixin:
|
||||
}
|
||||
model.enable_group_offload(**group_offload_kwargs)
|
||||
model.compile()
|
||||
|
||||
with torch.no_grad():
|
||||
_ = model(**inputs_dict)
|
||||
_ = model(**inputs_dict)
|
||||
|
||||
@require_torch_version_greater("2.7.1")
|
||||
def test_compile_on_different_shapes(self):
|
||||
if self.different_shapes_for_compilation is None:
|
||||
pytest.skip(f"Skipping as `different_shapes_for_compilation` is not set for {self.__class__.__name__}.")
|
||||
@@ -2123,6 +2124,7 @@ class TorchCompileTesterMixin:
|
||||
|
||||
init_dict, _ = self.prepare_init_args_and_inputs_for_common()
|
||||
model = self.model_class(**init_dict).to(torch_device)
|
||||
model.eval()
|
||||
model = torch.compile(model, fullgraph=True, dynamic=True)
|
||||
|
||||
for height, width in self.different_shapes_for_compilation:
|
||||
@@ -2130,6 +2132,26 @@ class TorchCompileTesterMixin:
|
||||
inputs_dict = self.prepare_dummy_input(height=height, width=width)
|
||||
_ = model(**inputs_dict)
|
||||
|
||||
def test_compile_works_with_aot(self):
|
||||
from torch._inductor.package import load_package
|
||||
|
||||
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
|
||||
model = self.model_class(**init_dict).to(torch_device)
|
||||
exported_model = torch.export.export(model, args=(), kwargs=inputs_dict)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
package_path = os.path.join(tmpdir, f"{self.model_class.__name__}.pt2")
|
||||
_ = torch._inductor.aoti_compile_and_package(exported_model, package_path=package_path)
|
||||
assert os.path.exists(package_path)
|
||||
loaded_binary = load_package(package_path, run_single_threaded=True)
|
||||
|
||||
model.forward = loaded_binary
|
||||
|
||||
with torch.no_grad():
|
||||
_ = model(**inputs_dict)
|
||||
_ = model(**inputs_dict)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_2
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user