Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b5ba24b1dc |
@@ -17,6 +17,9 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
|
||||
- [`StableDiffusionLoraLoaderMixin`] provides functions for loading and unloading, fusing and unfusing, enabling and disabling, and more functions for managing LoRA weights. This class can be used with any model.
|
||||
- [`StableDiffusionXLLoraLoaderMixin`] is a [Stable Diffusion (SDXL)](../../api/pipelines/stable_diffusion/stable_diffusion_xl) version of the [`StableDiffusionLoraLoaderMixin`] class for loading and saving LoRA weights. It can only be used with the SDXL model.
|
||||
- [`SD3LoraLoaderMixin`] provides similar functions for [Stable Diffusion 3](https://huggingface.co/blog/sd3).
|
||||
- [`FluxLoraLoaderMixin`] provides similar functions for [Flux](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux).
|
||||
- [`CogVideoXLoraLoaderMixin`] provides similar functions for [CogVideoX](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogvideox).
|
||||
- [`Mochi1LoraLoaderMixin`] provides similar functions for [Mochi](https://huggingface.co/docs/diffusers/main/en/api/pipelines/mochi).
|
||||
- [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
|
||||
- [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
|
||||
|
||||
@@ -38,6 +41,18 @@ To learn more about how to load LoRA weights, see the [LoRA](../../using-diffuse
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.SD3LoraLoaderMixin
|
||||
|
||||
## FluxLoraLoaderMixin
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin
|
||||
|
||||
## CogVideoXLoraLoaderMixin
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin
|
||||
|
||||
## Mochi1LoraLoaderMixin
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.Mochi1LoraLoaderMixin
|
||||
|
||||
## AmusedLoraLoaderMixin
|
||||
|
||||
[[autodoc]] loaders.lora_pipeline.AmusedLoraLoaderMixin
|
||||
|
||||
@@ -237,5 +237,3 @@ with torch.no_grad():
|
||||
```
|
||||
|
||||
By selectively loading and unloading the models you need at a given stage and sharding the largest models across multiple GPUs, it is possible to run inference with large models on consumer GPUs.
|
||||
|
||||
This workflow is also compatible with LoRAs via [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. However, only LoRAs without text encoder components are currently supported in this workflow.
|
||||
|
||||
@@ -327,18 +327,12 @@ class LoraBaseMixin:
|
||||
tuple:
|
||||
A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
|
||||
"""
|
||||
from ..pipelines.pipeline_loading_utils import model_has_device_map
|
||||
|
||||
is_model_cpu_offload = False
|
||||
is_sequential_cpu_offload = False
|
||||
|
||||
if _pipeline is not None and _pipeline.hf_device_map is None:
|
||||
for _, component in _pipeline.components.items():
|
||||
if (
|
||||
isinstance(component, nn.Module)
|
||||
and hasattr(component, "_hf_hook")
|
||||
and not model_has_device_map(component)
|
||||
):
|
||||
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
|
||||
if not is_model_cpu_offload:
|
||||
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
|
||||
if not is_sequential_cpu_offload:
|
||||
|
||||
@@ -400,18 +400,12 @@ class UNet2DConditionLoadersMixin:
|
||||
tuple:
|
||||
A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
|
||||
"""
|
||||
from ..pipelines.pipeline_loading_utils import model_has_device_map
|
||||
|
||||
is_model_cpu_offload = False
|
||||
is_sequential_cpu_offload = False
|
||||
|
||||
if _pipeline is not None and _pipeline.hf_device_map is None:
|
||||
for _, component in _pipeline.components.items():
|
||||
if (
|
||||
isinstance(component, nn.Module)
|
||||
and hasattr(component, "_hf_hook")
|
||||
and not model_has_device_map(component)
|
||||
):
|
||||
if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
|
||||
if not is_model_cpu_offload:
|
||||
is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
|
||||
if not is_sequential_cpu_offload:
|
||||
|
||||
@@ -36,7 +36,6 @@ from ..utils import (
|
||||
deprecate,
|
||||
get_class_from_dynamic_module,
|
||||
is_accelerate_available,
|
||||
is_accelerate_version,
|
||||
is_peft_available,
|
||||
is_transformers_available,
|
||||
logging,
|
||||
@@ -969,18 +968,3 @@ def _get_ignore_patterns(
|
||||
)
|
||||
|
||||
return ignore_patterns
|
||||
|
||||
|
||||
def model_has_device_map(model):
|
||||
if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"):
|
||||
return False
|
||||
|
||||
# Check if the model has a device map that is not exclusively CPU
|
||||
# `device_map` can only contain CPU when a model has sharded checkpoints.
|
||||
# See here: https://github.com/huggingface/diffusers/blob/41e4779d988ead99e7acd78dc8e752de88777d0f/src/diffusers/models/modeling_utils.py#L883
|
||||
device_map = getattr(model, "hf_device_map", None)
|
||||
if device_map is not None:
|
||||
unique_devices = set(device_map.values())
|
||||
return len(unique_devices) > 1 or unique_devices != {"cpu"}
|
||||
|
||||
return False
|
||||
|
||||
@@ -84,7 +84,6 @@ from .pipeline_loading_utils import (
|
||||
_update_init_kwargs_with_connected_pipeline,
|
||||
load_sub_model,
|
||||
maybe_raise_or_warn,
|
||||
model_has_device_map,
|
||||
variant_compatible_siblings,
|
||||
warn_deprecated_model_variant,
|
||||
)
|
||||
@@ -407,16 +406,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
|
||||
return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload)
|
||||
|
||||
# device-mapped modules should not go through any device placements.
|
||||
device_mapped_components = [
|
||||
key for key, component in self.components.items() if model_has_device_map(component)
|
||||
]
|
||||
if device_mapped_components:
|
||||
raise ValueError(
|
||||
"The following pipeline components have been found to use a device map: "
|
||||
f"{device_mapped_components}. This is incompatible with explicitly setting the device using `to()`."
|
||||
)
|
||||
|
||||
# .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer
|
||||
pipeline_is_sequentially_offloaded = any(
|
||||
module_is_sequentially_offloaded(module) for _, module in self.components.items()
|
||||
@@ -1019,16 +1008,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
||||
default to "cuda".
|
||||
"""
|
||||
# device-mapped modules should not go through any device placements.
|
||||
device_mapped_components = [
|
||||
key for key, component in self.components.items() if model_has_device_map(component)
|
||||
]
|
||||
if device_mapped_components:
|
||||
raise ValueError(
|
||||
"The following pipeline components have been found to use a device map: "
|
||||
f"{device_mapped_components}. This is incompatible with `enable_model_cpu_offload()`."
|
||||
)
|
||||
|
||||
is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
|
||||
if is_pipeline_device_mapped:
|
||||
raise ValueError(
|
||||
@@ -1131,16 +1110,6 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
||||
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
||||
default to "cuda".
|
||||
"""
|
||||
# device-mapped modules should not go through any device placements.
|
||||
device_mapped_components = [
|
||||
key for key, component in self.components.items() if model_has_device_map(component)
|
||||
]
|
||||
if device_mapped_components:
|
||||
raise ValueError(
|
||||
"The following pipeline components have been found to use a device map: "
|
||||
f"{device_mapped_components}. This is incompatible with `enable_sequential_cpu_offload()`."
|
||||
)
|
||||
|
||||
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||
from accelerate import cpu_offload
|
||||
else:
|
||||
|
||||
@@ -506,14 +506,9 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
model_dtypes = {key: component.dtype for key, component in components.items() if hasattr(component, "dtype")}
|
||||
self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes.values()))
|
||||
|
||||
@unittest.skip("Test currently not supported.")
|
||||
def test_sequential_cpu_offload_forward_pass(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test currently not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@nightly
|
||||
class AudioLDM2PipelineSlowTests(unittest.TestCase):
|
||||
|
||||
@@ -514,18 +514,6 @@ class StableDiffusionMultiControlNetPipelineFastTests(
|
||||
|
||||
assert image.shape == (4, 64, 64, 3)
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
class StableDiffusionMultiControlNetOneModelPipelineFastTests(
|
||||
IPAdapterTesterMixin, PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase
|
||||
@@ -709,18 +697,6 @@ class StableDiffusionMultiControlNetOneModelPipelineFastTests(
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -389,18 +389,6 @@ class StableDiffusionMultiControlNetPipelineFastTests(
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -441,18 +441,6 @@ class MultiControlNetInpaintPipelineFastTests(
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -683,18 +683,6 @@ class StableDiffusionXLMultiControlNetPipelineFastTests(
|
||||
def test_save_load_optional_components(self):
|
||||
return self._test_save_load_optional_components()
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
class StableDiffusionXLMultiControlNetOneModelPipelineFastTests(
|
||||
PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase
|
||||
@@ -899,18 +887,6 @@ class StableDiffusionXLMultiControlNetOneModelPipelineFastTests(
|
||||
|
||||
self.assertTrue(np.abs(image_slice_without_neg_cond - image_slice_with_neg_cond).max() > 1e-2)
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -8,11 +8,9 @@ from huggingface_hub import hf_hub_download
|
||||
from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from diffusers.utils.testing_utils import (
|
||||
numpy_cosine_similarity_distance,
|
||||
require_big_gpu_with_torch_cuda,
|
||||
require_torch_multi_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -298,172 +296,3 @@ class FluxPipelineSlowTests(unittest.TestCase):
|
||||
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
|
||||
|
||||
assert max_diff < 1e-4
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@torch.no_grad()
|
||||
def test_flux_component_sharding(self):
|
||||
"""
|
||||
internal note: test was run on `audace`.
|
||||
"""
|
||||
|
||||
ckpt_id = "black-forest-labs/FLUX.1-dev"
|
||||
dtype = torch.bfloat16
|
||||
prompt = "a photo of a cat with tiger-like look"
|
||||
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
ckpt_id,
|
||||
transformer=None,
|
||||
vae=None,
|
||||
device_map="balanced",
|
||||
max_memory={0: "16GB", 1: "16GB"},
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
prompt_embeds, pooled_prompt_embeds, _ = pipeline.encode_prompt(
|
||||
prompt=prompt, prompt_2=None, max_sequence_length=512
|
||||
)
|
||||
|
||||
del pipeline.text_encoder
|
||||
del pipeline.text_encoder_2
|
||||
del pipeline.tokenizer
|
||||
del pipeline.tokenizer_2
|
||||
del pipeline
|
||||
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
transformer = FluxTransformer2DModel.from_pretrained(
|
||||
ckpt_id, subfolder="transformer", device_map="auto", max_memory={0: "16GB", 1: "16GB"}, torch_dtype=dtype
|
||||
)
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
ckpt_id,
|
||||
text_encoder=None,
|
||||
text_encoder_2=None,
|
||||
tokenizer=None,
|
||||
tokenizer_2=None,
|
||||
vae=None,
|
||||
transformer=transformer,
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
|
||||
height, width = 768, 1360
|
||||
# No need to wrap it up under `torch.no_grad()` as pipeline call method
|
||||
# is already wrapped under that.
|
||||
latents = pipeline(
|
||||
prompt_embeds=prompt_embeds,
|
||||
pooled_prompt_embeds=pooled_prompt_embeds,
|
||||
num_inference_steps=10,
|
||||
guidance_scale=3.5,
|
||||
height=height,
|
||||
width=width,
|
||||
output_type="latent",
|
||||
generator=torch.manual_seed(0),
|
||||
).images
|
||||
latent_slice = latents[0, :3, :3].flatten().float().cpu().numpy()
|
||||
expected_slice = np.array([-0.377, -0.3008, -0.5117, -0.252, 0.0615, -0.3477, -0.1309, -0.1914, 0.1533])
|
||||
|
||||
assert numpy_cosine_similarity_distance(latent_slice, expected_slice) < 1e-4
|
||||
|
||||
del pipeline.transformer
|
||||
del pipeline
|
||||
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype).to(torch_device)
|
||||
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
|
||||
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
|
||||
|
||||
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
|
||||
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
|
||||
|
||||
image = vae.decode(latents, return_dict=False)[0]
|
||||
image = image_processor.postprocess(image, output_type="np")
|
||||
image_slice = image[0, :3, :3, -1].flatten()
|
||||
expected_slice = np.array([0.127, 0.1113, 0.1055, 0.1172, 0.1172, 0.1074, 0.1191, 0.1191, 0.1152])
|
||||
|
||||
assert numpy_cosine_similarity_distance(image_slice, expected_slice) < 1e-4
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@torch.no_grad()
|
||||
def test_flux_component_sharding_with_lora(self):
|
||||
"""
|
||||
internal note: test was run on `audace`.
|
||||
"""
|
||||
|
||||
ckpt_id = "black-forest-labs/FLUX.1-dev"
|
||||
dtype = torch.bfloat16
|
||||
prompt = "jon snow eating pizza."
|
||||
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
ckpt_id,
|
||||
transformer=None,
|
||||
vae=None,
|
||||
device_map="balanced",
|
||||
max_memory={0: "16GB", 1: "16GB"},
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
prompt_embeds, pooled_prompt_embeds, _ = pipeline.encode_prompt(
|
||||
prompt=prompt, prompt_2=None, max_sequence_length=512
|
||||
)
|
||||
|
||||
del pipeline.text_encoder
|
||||
del pipeline.text_encoder_2
|
||||
del pipeline.tokenizer
|
||||
del pipeline.tokenizer_2
|
||||
del pipeline
|
||||
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
transformer = FluxTransformer2DModel.from_pretrained(
|
||||
ckpt_id, subfolder="transformer", device_map="auto", max_memory={0: "16GB", 1: "16GB"}, torch_dtype=dtype
|
||||
)
|
||||
pipeline = FluxPipeline.from_pretrained(
|
||||
ckpt_id,
|
||||
text_encoder=None,
|
||||
text_encoder_2=None,
|
||||
tokenizer=None,
|
||||
tokenizer_2=None,
|
||||
vae=None,
|
||||
transformer=transformer,
|
||||
torch_dtype=dtype,
|
||||
)
|
||||
pipeline.load_lora_weights("TheLastBen/Jon_Snow_Flux_LoRA", weight_name="jon_snow.safetensors")
|
||||
|
||||
height, width = 768, 1360
|
||||
# No need to wrap it up under `torch.no_grad()` as pipeline call method
|
||||
# is already wrapped under that.
|
||||
latents = pipeline(
|
||||
prompt_embeds=prompt_embeds,
|
||||
pooled_prompt_embeds=pooled_prompt_embeds,
|
||||
num_inference_steps=10,
|
||||
guidance_scale=3.5,
|
||||
height=height,
|
||||
width=width,
|
||||
output_type="latent",
|
||||
generator=torch.manual_seed(0),
|
||||
).images
|
||||
latent_slice = latents[0, :3, :3].flatten().float().cpu().numpy()
|
||||
expected_slice = np.array([-0.6523, -0.4961, -0.9141, -0.5, -0.2129, -0.6914, -0.375, -0.5664, -0.1699])
|
||||
|
||||
assert numpy_cosine_similarity_distance(latent_slice, expected_slice) < 1e-4
|
||||
|
||||
del pipeline.transformer
|
||||
del pipeline
|
||||
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype).to(torch_device)
|
||||
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
|
||||
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
|
||||
|
||||
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
|
||||
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
|
||||
|
||||
image = vae.decode(latents, return_dict=False)[0]
|
||||
image = image_processor.postprocess(image, output_type="np")
|
||||
image_slice = image[0, :3, :3, -1].flatten()
|
||||
expected_slice = np.array([0.1211, 0.1094, 0.1035, 0.1094, 0.1113, 0.1074, 0.1133, 0.1133, 0.1094])
|
||||
|
||||
assert numpy_cosine_similarity_distance(image_slice, expected_slice) < 1e-4
|
||||
|
||||
@@ -139,18 +139,6 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
def test_dict_tuple_outputs_equivalent(self):
|
||||
super().test_dict_tuple_outputs_equivalent(expected_max_difference=5e-4)
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
pipeline_class = KandinskyImg2ImgCombinedPipeline
|
||||
@@ -260,18 +248,6 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
def test_save_load_optional_components(self):
|
||||
super().test_save_load_optional_components(expected_max_difference=5e-4)
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
pipeline_class = KandinskyInpaintCombinedPipeline
|
||||
@@ -387,15 +363,3 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
|
||||
def test_save_load_local(self):
|
||||
super().test_save_load_local(expected_max_difference=5e-3)
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
@@ -30,16 +28,11 @@ from transformers import (
|
||||
)
|
||||
|
||||
from diffusers import KandinskyPriorPipeline, PriorTransformer, UnCLIPScheduler
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate.utils import compute_module_sizes
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
@@ -243,31 +236,3 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
test_max_difference=test_max_difference,
|
||||
test_mean_pixel_difference=test_mean_pixel_difference,
|
||||
)
|
||||
|
||||
# It needs a different sharding ratio than the standard 0.75. So, we override it.
|
||||
def test_sharded_components_can_be_device_placed(self):
|
||||
components = self.get_dummy_components()
|
||||
|
||||
component_selected = None
|
||||
for component_name in components:
|
||||
if isinstance(components[component_name], ModelMixin) and hasattr(
|
||||
components[component_name], "load_config"
|
||||
):
|
||||
component_to_be_sharded = components[component_name]
|
||||
component_cls = component_to_be_sharded.__class__
|
||||
component_selected = component_name
|
||||
break
|
||||
|
||||
assert component_selected, "No component selected that can be sharded."
|
||||
|
||||
model_size = compute_module_sizes(component_to_be_sharded)[""]
|
||||
max_shard_size = int((model_size * 0.45) / (2**10))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
|
||||
|
||||
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
|
||||
_ = components.pop(component_selected)
|
||||
components.update({component_selected: loaded_sharded_component})
|
||||
_ = self.pipeline_class(**components).to(torch_device)
|
||||
|
||||
@@ -159,18 +159,6 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
def test_callback_cfg(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
pipeline_class = KandinskyV22Img2ImgCombinedPipeline
|
||||
@@ -293,18 +281,6 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
|
||||
def test_callback_cfg(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
pipeline_class = KandinskyV22InpaintCombinedPipeline
|
||||
@@ -428,15 +404,3 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
|
||||
|
||||
def test_callback_cfg(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
import inspect
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
@@ -31,17 +29,11 @@ from transformers import (
|
||||
)
|
||||
|
||||
from diffusers import KandinskyV22PriorPipeline, PriorTransformer, UnCLIPScheduler
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, is_accelerate_available, skip_mps, torch_device
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, skip_mps, torch_device
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate.utils import compute_module_sizes
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
@@ -285,31 +277,3 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
|
||||
output = pipe(**inputs)[0]
|
||||
assert output.abs().sum() == 0
|
||||
|
||||
# It needs a different sharding ratio than the standard 0.75. So, we override it.
|
||||
def test_sharded_components_can_be_device_placed(self):
|
||||
components = self.get_dummy_components()
|
||||
|
||||
component_selected = None
|
||||
for component_name in components:
|
||||
if isinstance(components[component_name], ModelMixin) and hasattr(
|
||||
components[component_name], "load_config"
|
||||
):
|
||||
component_to_be_sharded = components[component_name]
|
||||
component_cls = component_to_be_sharded.__class__
|
||||
component_selected = component_name
|
||||
break
|
||||
|
||||
assert component_selected, "No component selected that can be sharded."
|
||||
|
||||
model_size = compute_module_sizes(component_to_be_sharded)[""]
|
||||
max_shard_size = int((model_size * 0.45) / (2**10))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
|
||||
|
||||
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
|
||||
_ = components.pop(component_selected)
|
||||
components.update({component_selected: loaded_sharded_component})
|
||||
_ = self.pipeline_class(**components).to(torch_device)
|
||||
|
||||
@@ -13,9 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import random
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
@@ -32,12 +30,9 @@ from transformers import (
|
||||
)
|
||||
|
||||
from diffusers import KandinskyV22PriorEmb2EmbPipeline, PriorTransformer, UnCLIPScheduler
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
is_accelerate_available,
|
||||
skip_mps,
|
||||
torch_device,
|
||||
)
|
||||
@@ -45,10 +40,6 @@ from diffusers.utils.testing_utils import (
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate.utils import compute_module_sizes
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
@@ -249,31 +240,3 @@ class KandinskyV22PriorEmb2EmbPipelineFastTests(PipelineTesterMixin, unittest.Te
|
||||
test_max_difference=test_max_difference,
|
||||
test_mean_pixel_difference=test_mean_pixel_difference,
|
||||
)
|
||||
|
||||
# It needs a different sharding ratio than the standard 0.75. So, we override it.
|
||||
def test_sharded_components_can_be_device_placed(self):
|
||||
components = self.get_dummy_components()
|
||||
|
||||
component_selected = None
|
||||
for component_name in components:
|
||||
if isinstance(components[component_name], ModelMixin) and hasattr(
|
||||
components[component_name], "load_config"
|
||||
):
|
||||
component_to_be_sharded = components[component_name]
|
||||
component_cls = component_to_be_sharded.__class__
|
||||
component_selected = component_name
|
||||
break
|
||||
|
||||
assert component_selected, "No component selected that can be sharded."
|
||||
|
||||
model_size = compute_module_sizes(component_to_be_sharded)[""]
|
||||
max_shard_size = int((model_size * 0.45) / (2**10))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
|
||||
|
||||
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
|
||||
_ = components.pop(component_selected)
|
||||
components.update({component_selected: loaded_sharded_component})
|
||||
_ = self.pipeline_class(**components).to(torch_device)
|
||||
|
||||
@@ -404,10 +404,6 @@ class MusicLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
model_dtypes = {key: component.dtype for key, component in components.items() if hasattr(component, "dtype")}
|
||||
self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes.values()))
|
||||
|
||||
@unittest.skip("Test currently not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -279,15 +279,3 @@ class StableCascadeCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
)
|
||||
|
||||
assert np.abs(output_prompt.images - output_prompt_embeds.images).max() < 1e-5
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@@ -593,18 +593,6 @@ class StableDiffusionMultiAdapterPipelineFastTests(AdapterTests, PipelineTesterM
|
||||
if test_mean_pixel_difference:
|
||||
assert_mean_pixel_difference(output_batch[0][0], output[0][0])
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -642,6 +642,9 @@ class StableDiffusionXLMultiAdapterPipelineFastTests(
|
||||
assert image.shape == (1, 64, 64, 3)
|
||||
expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448])
|
||||
|
||||
debug = [str(round(i, 4)) for i in image_slice.flatten().tolist()]
|
||||
print(",".join(debug))
|
||||
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||
|
||||
def test_adapter_sdxl_lcm_custom_timesteps(self):
|
||||
@@ -664,16 +667,7 @@ class StableDiffusionXLMultiAdapterPipelineFastTests(
|
||||
assert image.shape == (1, 64, 64, 3)
|
||||
expected_slice = np.array([0.5313, 0.5375, 0.4942, 0.5021, 0.6142, 0.4968, 0.5434, 0.5311, 0.5448])
|
||||
|
||||
debug = [str(round(i, 4)) for i in image_slice.flatten().tolist()]
|
||||
print(",".join(debug))
|
||||
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
import gc
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
@@ -14,17 +12,8 @@ from diffusers import (
|
||||
StableUnCLIPPipeline,
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
|
||||
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
is_accelerate_available,
|
||||
load_numpy,
|
||||
nightly,
|
||||
require_torch_gpu,
|
||||
torch_device,
|
||||
)
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device
|
||||
|
||||
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
|
||||
from ..test_pipelines_common import (
|
||||
@@ -35,10 +24,6 @@ from ..test_pipelines_common import (
|
||||
)
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate.utils import compute_module_sizes
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
@@ -199,46 +184,6 @@ class StableUnCLIPPipelineFastTests(
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
# It needs a different sharding ratio than the standard 0.75. So, we override it.
|
||||
def test_sharded_components_can_be_device_placed(self):
|
||||
components = self.get_dummy_components()
|
||||
|
||||
component_selected = None
|
||||
for component_name in components:
|
||||
if isinstance(components[component_name], ModelMixin) and hasattr(
|
||||
components[component_name], "load_config"
|
||||
):
|
||||
component_to_be_sharded = components[component_name]
|
||||
component_cls = component_to_be_sharded.__class__
|
||||
component_selected = component_name
|
||||
break
|
||||
|
||||
assert component_selected, "No component selected that can be sharded."
|
||||
|
||||
model_size = compute_module_sizes(component_to_be_sharded)[""]
|
||||
max_shard_size = int((model_size * 0.45) / (2**10))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
|
||||
|
||||
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
|
||||
_ = components.pop(component_selected)
|
||||
components.update({component_selected: loaded_sharded_component})
|
||||
_ = self.pipeline_class(**components).to(torch_device)
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -205,18 +205,6 @@ class StableUnCLIPImg2ImgPipelineFastTests(
|
||||
def test_xformers_attention_forwardGenerator_pass(self):
|
||||
self._test_xformers_attention_forwardGenerator_pass(test_max_difference=False)
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -41,14 +41,10 @@ from diffusers.utils import logging
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
CaptureLogger,
|
||||
is_accelerate_available,
|
||||
nightly,
|
||||
require_accelerate_version_greater,
|
||||
require_accelerator,
|
||||
require_torch,
|
||||
require_torch_multi_gpu,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
@@ -65,10 +61,6 @@ from ..models.unets.test_models_unet_2d_condition import (
|
||||
from ..others.test_utils import TOKEN, USER, is_staging_test
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate.utils import compute_module_sizes
|
||||
|
||||
|
||||
def to_np(tensor):
|
||||
if isinstance(tensor, torch.Tensor):
|
||||
tensor = tensor.detach().cpu().numpy()
|
||||
@@ -1910,78 +1902,6 @@ class PipelineTesterMixin:
|
||||
)
|
||||
)
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@slow
|
||||
@nightly
|
||||
def test_calling_to_raises_error_device_mapped_components(self, safe_serialization=True):
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
max_model_size = max(
|
||||
compute_module_sizes(module)[""]
|
||||
for _, module in pipe.components.items()
|
||||
if isinstance(module, torch.nn.Module)
|
||||
)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pipe.save_pretrained(tmpdir, safe_serialization=safe_serialization)
|
||||
max_memory = {0: max_model_size, 1: max_model_size}
|
||||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory)
|
||||
|
||||
with self.assertRaises(ValueError) as err_context:
|
||||
loaded_pipe.to(torch_device)
|
||||
|
||||
self.assertTrue(
|
||||
"The following pipeline components have been found" in str(err_context.exception)
|
||||
and "This is incompatible with explicitly setting the device using `to()`" in str(err_context.exception)
|
||||
)
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@slow
|
||||
@nightly
|
||||
def test_calling_mco_raises_error_device_mapped_components(self, safe_serialization=True):
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
max_model_size = max(
|
||||
compute_module_sizes(module)[""]
|
||||
for _, module in pipe.components.items()
|
||||
if isinstance(module, torch.nn.Module)
|
||||
)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pipe.save_pretrained(tmpdir, safe_serialization=safe_serialization)
|
||||
max_memory = {0: max_model_size, 1: max_model_size}
|
||||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory)
|
||||
|
||||
with self.assertRaises(ValueError) as err_context:
|
||||
loaded_pipe.enable_model_cpu_offload()
|
||||
|
||||
self.assertTrue(
|
||||
"The following pipeline components have been found" in str(err_context.exception)
|
||||
and "This is incompatible with `enable_model_cpu_offload()`" in str(err_context.exception)
|
||||
)
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@slow
|
||||
@nightly
|
||||
def test_calling_sco_raises_error_device_mapped_components(self, safe_serialization=True):
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
max_model_size = max(
|
||||
compute_module_sizes(module)[""]
|
||||
for _, module in pipe.components.items()
|
||||
if isinstance(module, torch.nn.Module)
|
||||
)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pipe.save_pretrained(tmpdir, safe_serialization=safe_serialization)
|
||||
max_memory = {0: max_model_size, 1: max_model_size}
|
||||
loaded_pipe = self.pipeline_class.from_pretrained(tmpdir, device_map="balanced", max_memory=max_memory)
|
||||
|
||||
with self.assertRaises(ValueError) as err_context:
|
||||
loaded_pipe.enable_sequential_cpu_offload()
|
||||
|
||||
self.assertTrue(
|
||||
"The following pipeline components have been found" in str(err_context.exception)
|
||||
and "This is incompatible with `enable_sequential_cpu_offload()`" in str(err_context.exception)
|
||||
)
|
||||
|
||||
|
||||
@is_staging_test
|
||||
class PipelinePushToHubTester(unittest.TestCase):
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
import gc
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
@@ -23,12 +21,9 @@ import torch
|
||||
from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer
|
||||
|
||||
from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel
|
||||
from diffusers.models.modeling_utils import ModelMixin
|
||||
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
is_accelerate_available,
|
||||
load_numpy,
|
||||
nightly,
|
||||
require_torch_gpu,
|
||||
@@ -40,9 +35,6 @@ from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
|
||||
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
|
||||
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate.utils import compute_module_sizes
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
@@ -426,34 +418,6 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def test_float16_inference(self):
|
||||
super().test_float16_inference(expected_max_diff=1.0)
|
||||
|
||||
# It needs a different sharding ratio than the standard 0.75. So, we override it.
|
||||
def test_sharded_components_can_be_device_placed(self):
|
||||
components = self.get_dummy_components()
|
||||
|
||||
component_selected = None
|
||||
for component_name in components:
|
||||
if isinstance(components[component_name], ModelMixin) and hasattr(
|
||||
components[component_name], "load_config"
|
||||
):
|
||||
component_to_be_sharded = components[component_name]
|
||||
component_cls = component_to_be_sharded.__class__
|
||||
component_selected = component_name
|
||||
break
|
||||
|
||||
assert component_selected, "No component selected that can be sharded."
|
||||
|
||||
model_size = compute_module_sizes(component_to_be_sharded)[""]
|
||||
max_shard_size = int((model_size * 0.45) / (2**10))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
component_to_be_sharded.cpu().save_pretrained(tmp_dir, max_shard_size=f"{max_shard_size}KB")
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dir, SAFE_WEIGHTS_INDEX_NAME)))
|
||||
|
||||
loaded_sharded_component = component_cls.from_pretrained(tmp_dir)
|
||||
_ = components.pop(component_selected)
|
||||
components.update({component_selected: loaded_sharded_component})
|
||||
_ = self.pipeline_class(**components).to(torch_device)
|
||||
|
||||
|
||||
@nightly
|
||||
class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase):
|
||||
|
||||
@@ -576,15 +576,6 @@ class UniDiffuserPipelineFastTests(
|
||||
expected_text_prefix = '" This This'
|
||||
assert text[0][: len(expected_text_prefix)] == expected_text_prefix
|
||||
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
super().test_calling_mco_raises_error_device_mapped_components(safe_serialization=False)
|
||||
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
super().test_calling_to_raises_error_device_mapped_components(safe_serialization=False)
|
||||
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
super().test_calling_sco_raises_error_device_mapped_components(safe_serialization=False)
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
|
||||
@@ -237,15 +237,3 @@ class WuerstchenCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestCase
|
||||
|
||||
def test_callback_cfg(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_mco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_to_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Test not supported.")
|
||||
def test_calling_sco_raises_error_device_mapped_components(self):
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user