Compare commits

..

9 Commits

Author SHA1 Message Date
github-actions[bot] 7e57a5ae36 Apply style fixes 2025-04-09 15:04:16 +00:00
sayakpaul 13cbae47e7 push changes to mess up styling. 2025-04-09 20:32:12 +05:30
Guillaume LEGENDRE 5bb76a4d90 test new token for STYLE BOT 2025-04-09 16:26:03 +02:00
Sayak Paul 5b27f8aba8 fix consisid imports (#11254)
* fix consisid imports

* fix opencv import

* fix
2025-04-09 18:49:32 +05:30
Sayak Paul d1387ecee5 fix timeout constant (#11252)
* fix timeout constant

* style

* fix
2025-04-09 17:48:52 +05:30
Ilya Drobyshevskiy 6a7c2d0afa fix flux controlnet bug (#11152)
Before this if txt_ids was 3d tensor, line with txt_ids[:1] concat txt_ids by batch dim. Now we first check that txt_ids is 2d tensor (or take first batch element) and then concat by token dim
2025-04-09 13:01:07 +01:00
Dhruv Nair edc154da09 Update Ruff to latest Version (#10919)
* update

* update

* update

* update
2025-04-09 16:51:34 +05:30
hlky 552cd32058 [docs] AutoModel (#11250)
Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
2025-04-09 16:42:23 +05:30
Yao Matrix c36c745ceb fix FluxReduxSlowTests::test_flux_redux_inference case failure on XPU (#11245)
* loose test_float16_inference's tolerance from 5e-2 to 6e-2, so XPU can
pass UT

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* fix test_pipeline_flux_redux fail on XPU

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

---------

Signed-off-by: Matrix Yao <matrix.yao@intel.com>
2025-04-09 11:41:15 +01:00
17 changed files with 195 additions and 73 deletions
+1 -1
View File
@@ -14,4 +14,4 @@ jobs:
with:
python_quality_dependencies: "[quality]"
secrets:
bot_token: ${{ secrets.GITHUB_TOKEN }}
bot_token: ${{ secrets.DIFFUSERS_STYLE_BOT_ACTION_TOKEN }}
+2
View File
@@ -265,6 +265,8 @@
sections:
- local: api/models/overview
title: Overview
- local: api/models/auto_model
title: AutoModel
- sections:
- local: api/models/controlnet
title: ControlNetModel
+29
View File
@@ -0,0 +1,29 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# AutoModel
The `AutoModel` is designed to make it easy to load a checkpoint without needing to know the specific model class. `AutoModel` automatically retrieves the correct model class from the checkpoint `config.json` file.
```python
from diffusers import AutoModel, AutoPipelineForText2Image
unet = AutoModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet")
pipe = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", unet=unet)
```
## AutoModel
[[autodoc]] AutoModel
- all
- from_pretrained
+1
View File
@@ -142,6 +142,7 @@ _deps = [
"urllib3<=2.0.0",
"black",
"phonemizer",
"opencv-python",
]
# this is a lookup table with items like:
+23 -2
View File
@@ -14,6 +14,7 @@ from .utils import (
is_librosa_available,
is_note_seq_available,
is_onnx_available,
is_opencv_available,
is_optimum_quanto_available,
is_scipy_available,
is_sentencepiece_available,
@@ -352,7 +353,6 @@ else:
"CogView3PlusPipeline",
"CogView4ControlPipeline",
"CogView4Pipeline",
"ConsisIDPipeline",
"CycleDiffusionPipeline",
"EasyAnimateControlPipeline",
"EasyAnimateInpaintPipeline",
@@ -518,6 +518,19 @@ else:
]
)
try:
if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from .utils import dummy_torch_and_transformers_and_opencv_objects # noqa F403
_import_structure["utils.dummy_torch_and_transformers_and_opencv_objects"] = [
name for name in dir(dummy_torch_and_transformers_and_opencv_objects) if not name.startswith("_")
]
else:
_import_structure["pipelines"].extend(["ConsisIDPipeline"])
try:
if not (is_torch_available() and is_transformers_available() and is_k_diffusion_available()):
raise OptionalDependencyNotAvailable()
@@ -909,7 +922,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
CogView3PlusPipeline,
CogView4ControlPipeline,
CogView4Pipeline,
ConsisIDPipeline,
CycleDiffusionPipeline,
EasyAnimateControlPipeline,
EasyAnimateInpaintPipeline,
@@ -1088,6 +1100,15 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
from .utils.dummy_torch_and_transformers_and_sentencepiece_objects import * # noqa F403
else:
from .pipelines import KolorsImg2ImgPipeline, KolorsPAGPipeline, KolorsPipeline
try:
if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from .utils.dummy_torch_and_transformers_and_opencv_objects import * # noqa F403
else:
from .pipelines import ConsisIDPipeline
try:
if not (is_torch_available() and is_transformers_available() and is_onnx_available()):
raise OptionalDependencyNotAvailable()
@@ -49,4 +49,5 @@ deps = {
"urllib3": "urllib3<=2.0.0",
"black": "black",
"phonemizer": "phonemizer",
"opencv-python": "opencv-python",
}
@@ -298,15 +298,6 @@ class FluxControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
)
encoder_hidden_states = self.context_embedder(encoder_hidden_states)
if self.union:
# union mode
if controlnet_mode is None:
raise ValueError("`controlnet_mode` cannot be `None` when applying ControlNet-Union")
# union mode emb
controlnet_mode_emb = self.controlnet_mode_embedder(controlnet_mode)
encoder_hidden_states = torch.cat([controlnet_mode_emb, encoder_hidden_states], dim=1)
txt_ids = torch.cat([txt_ids[:1], txt_ids], dim=0)
if txt_ids.ndim == 3:
logger.warning(
"Passing `txt_ids` 3d torch.Tensor is deprecated."
@@ -320,6 +311,15 @@ class FluxControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
)
img_ids = img_ids[0]
if self.union:
# union mode
if controlnet_mode is None:
raise ValueError("`controlnet_mode` cannot be `None` when applying ControlNet-Union")
# union mode emb
controlnet_mode_emb = self.controlnet_mode_embedder(controlnet_mode)
encoder_hidden_states = torch.cat([controlnet_mode_emb, encoder_hidden_states], dim=1)
txt_ids = torch.cat([txt_ids[:1], txt_ids], dim=0)
ids = torch.cat((txt_ids, img_ids), dim=0)
image_rotary_emb = self.pos_embed(ids)
+21 -2
View File
@@ -10,6 +10,7 @@ from ..utils import (
is_librosa_available,
is_note_seq_available,
is_onnx_available,
is_opencv_available,
is_sentencepiece_available,
is_torch_available,
is_torch_npu_available,
@@ -155,7 +156,6 @@ else:
]
_import_structure["cogview3"] = ["CogView3PlusPipeline"]
_import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"]
_import_structure["consisid"] = ["ConsisIDPipeline"]
_import_structure["controlnet"].extend(
[
"BlipDiffusionControlNetPipeline",
@@ -414,6 +414,18 @@ else:
"KolorsImg2ImgPipeline",
]
try:
if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ..utils import (
dummy_torch_and_transformers_and_opencv_objects,
)
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_opencv_objects))
else:
_import_structure["consisid"] = ["ConsisIDPipeline"]
try:
if not is_flax_available():
raise OptionalDependencyNotAvailable()
@@ -512,7 +524,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
)
from .cogview3 import CogView3PlusPipeline
from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
from .consisid import ConsisIDPipeline
from .controlnet import (
BlipDiffusionControlNetPipeline,
StableDiffusionControlNetImg2ImgPipeline,
@@ -761,6 +772,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
KolorsPipeline,
)
try:
if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ..utils.dummy_torch_and_transformers_and_opencv_objects import *
else:
from .consisid import ConsisIDPipeline
try:
if not is_flax_available():
raise OptionalDependencyNotAvailable()
+4 -3
View File
@@ -5,6 +5,7 @@ from ...utils import (
OptionalDependencyNotAvailable,
_LazyModule,
get_objects_from_module,
is_opencv_available,
is_torch_available,
is_transformers_available,
)
@@ -15,12 +16,12 @@ _import_structure = {}
try:
if not (is_transformers_available() and is_torch_available()):
if not (is_transformers_available() and is_torch_available() and is_opencv_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils import dummy_torch_and_transformers_objects # noqa F403
from ...utils import dummy_torch_and_transformers_and_opencv_objects # noqa F403
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_opencv_objects))
else:
_import_structure["pipeline_consisid"] = ["ConsisIDPipeline"]
@@ -16,7 +16,6 @@ import inspect
import math
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import cv2
import numpy as np
import PIL
import torch
@@ -29,12 +28,16 @@ from ...models import AutoencoderKLCogVideoX, ConsisIDTransformer3DModel
from ...models.embeddings import get_3d_rotary_pos_embed
from ...pipelines.pipeline_utils import DiffusionPipeline
from ...schedulers import CogVideoXDPMScheduler
from ...utils import logging, replace_example_docstring
from ...utils import is_opencv_available, logging, replace_example_docstring
from ...utils.torch_utils import randn_tensor
from ...video_processor import VideoProcessor
from .pipeline_output import ConsisIDPipelineOutput
if is_opencv_available():
import cv2
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
+1
View File
@@ -79,6 +79,7 @@ from .import_utils import (
is_matplotlib_available,
is_note_seq_available,
is_onnx_available,
is_opencv_available,
is_optimum_quanto_available,
is_optimum_quanto_version,
is_peft_available,
@@ -0,0 +1,17 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..utils import DummyObject, requires_backends
class ConsisIDPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers", "opencv"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers", "opencv"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "opencv"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "opencv"])
@@ -392,21 +392,6 @@ class CogView4Pipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"])
class ConsisIDPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class CycleDiffusionPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
+2
View File
@@ -37,6 +37,8 @@ class DependencyTester(unittest.TestCase):
backend = "k-diffusion"
elif backend == "invisible_watermark":
backend = "invisible-watermark"
elif backend == "opencv":
backend = "opencv-python"
assert backend in deps, f"{backend} is not in the deps table!"
def test_pipeline_imports(self):
@@ -8,6 +8,7 @@ import torch
from diffusers import FluxPipeline, FluxPriorReduxPipeline
from diffusers.utils import load_image
from diffusers.utils.testing_utils import (
Expectations,
backend_empty_cache,
numpy_cosine_similarity_distance,
require_big_accelerator,
@@ -21,7 +22,7 @@ from diffusers.utils.testing_utils import (
@pytest.mark.big_gpu_with_torch_cuda
class FluxReduxSlowTests(unittest.TestCase):
pipeline_class = FluxPriorReduxPipeline
repo_id = "YiYiXu/yiyi-redux" # update to "black-forest-labs/FLUX.1-Redux-dev" once PR is merged
repo_id = "black-forest-labs/FLUX.1-Redux-dev"
base_pipeline_class = FluxPipeline
base_repo_id = "black-forest-labs/FLUX.1-schnell"
@@ -69,41 +70,82 @@ class FluxReduxSlowTests(unittest.TestCase):
image = pipe_base(**base_pipeline_inputs, **redux_pipeline_output).images[0]
image_slice = image[0, :10, :10]
expected_slice = np.array(
[
0.30078125,
0.37890625,
0.46875,
0.28125,
0.36914062,
0.47851562,
0.28515625,
0.375,
0.4765625,
0.28125,
0.375,
0.48046875,
0.27929688,
0.37695312,
0.47851562,
0.27734375,
0.38085938,
0.4765625,
0.2734375,
0.38085938,
0.47265625,
0.27539062,
0.37890625,
0.47265625,
0.27734375,
0.37695312,
0.47070312,
0.27929688,
0.37890625,
0.47460938,
],
dtype=np.float32,
expected_slices = Expectations(
{
("cuda", 7): np.array(
[
0.30078125,
0.37890625,
0.46875,
0.28125,
0.36914062,
0.47851562,
0.28515625,
0.375,
0.4765625,
0.28125,
0.375,
0.48046875,
0.27929688,
0.37695312,
0.47851562,
0.27734375,
0.38085938,
0.4765625,
0.2734375,
0.38085938,
0.47265625,
0.27539062,
0.37890625,
0.47265625,
0.27734375,
0.37695312,
0.47070312,
0.27929688,
0.37890625,
0.47460938,
],
dtype=np.float32,
),
("xpu", 3): np.array(
[
0.20507812,
0.30859375,
0.3984375,
0.18554688,
0.30078125,
0.41015625,
0.19921875,
0.3125,
0.40625,
0.19726562,
0.3125,
0.41601562,
0.19335938,
0.31445312,
0.4140625,
0.1953125,
0.3203125,
0.41796875,
0.19726562,
0.32421875,
0.41992188,
0.19726562,
0.32421875,
0.41992188,
0.20117188,
0.32421875,
0.41796875,
0.203125,
0.32617188,
0.41796875,
],
dtype=np.float32,
),
}
)
expected_slice = expected_slices.get_expectation()
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
assert max_diff < 1e-4
+1 -1
View File
@@ -1347,7 +1347,7 @@ class PipelineTesterMixin:
@unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_float16_inference(self, expected_max_diff=5e-2):
def test_float16_inference(self, expected_max_diff=6e-2):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
+1 -3
View File
@@ -17,8 +17,6 @@
import requests
from packaging.version import parse
from ..src.diffusers.utils.constants import DIFFUSERS_REQUEST_TIMEOUT
# GitHub repository details
USER = "huggingface"
@@ -33,7 +31,7 @@ def fetch_all_branches(user, repo):
response = requests.get(
f"https://api.github.com/repos/{user}/{repo}/branches",
params={"page": page},
timeout=DIFFUSERS_REQUEST_TIMEOUT,
timeout=60,
)
# Check if the request was successful