update

2024-02-13 03:47:00 +00:00 · 2024-02-12 18:22:36 +00:00 · 2024-02-12 16:10:28 +00:00 · 2024-02-12 14:24:24 +00:00
9 changed files with 63 additions and 93 deletions
@@ -56,60 +56,6 @@ pipeline = DiffusionPipeline.from_pretrained(
 )
 ```

-### Load from a local file
-
-Community pipelines can also be loaded from a local file if you pass a file path instead. The path to the passed directory must contain a `pipeline.py` file that contains the pipeline class in order to successfully load it.
-
-```py
-pipeline = DiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    custom_pipeline="./path/to/pipeline_directory/",
-    clip_model=clip_model,
-    feature_extractor=feature_extractor,
-    use_safetensors=True,
-)
-```
-
-### Load from a specific version
-
-By default, community pipelines are loaded from the latest stable version of Diffusers. To load a community pipeline from another version, use the `custom_revision` parameter.
-
-<hfoptions id="version">
-<hfoption id="main">
-
-For example, to load from the `main` branch:
-
-```py
-pipeline = DiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    custom_pipeline="clip_guided_stable_diffusion",
-    custom_revision="main",
-    clip_model=clip_model,
-    feature_extractor=feature_extractor,
-    use_safetensors=True,
-)
-```
-
-</hfoption>
-<hfoption id="older version">
-
-For example, to load from a previous version of Diffusers like `v0.25.0`:
-
-```py
-pipeline = DiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    custom_pipeline="clip_guided_stable_diffusion",
-    custom_revision="v0.25.0",
-    clip_model=clip_model,
-    feature_extractor=feature_extractor,
-    use_safetensors=True,
-)
-```
-
-</hfoption>
-</hfoptions>
-
-
 For more information about community pipelines, take a look at the [Community pipelines](custom_pipeline_examples) guide for how to use them and if you're interested in adding a community pipeline check out the [How to contribute a community pipeline](contribute_pipeline) guide!

 ## Community components
@@ -376,14 +376,18 @@ After training, LoRA weights can be loaded very easily into the original pipelin
 load the original pipeline:

 ```python
-from diffusers import DiffusionPipeline
-pipe = DiffusionPipeline.from_pretrained("base-model-name").to("cuda")
+from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
+import torch
+
+pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+pipe.to("cuda")
 ```

-Next, we can load the adapter layers into the pipeline with the [`load_lora_weights` function](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading_adapters#lora).
+Next, we can load the adapter layers into the UNet with the [`load_attn_procs` function](https://huggingface.co/docs/diffusers/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs).

 ```python
-pipe.load_lora_weights("path-to-the-lora-checkpoint")
+pipe.unet.load_attn_procs("patrickvonplaten/lora_dreambooth_dog_example")
 ```

 Finally, we can run the model in inference.
@@ -67,8 +67,8 @@ DATASET_NAME_MAPPING = {
 def save_model_card(
    args,
    repo_id: str,
-    images: list = None,
-    repo_folder: str = None,
+    images=None,
+    repo_folder=None,
 ):
    img_str = ""
    if len(images) > 0:
@@ -56,9 +56,7 @@ check_min_version("0.27.0.dev0")
 logger = get_logger(__name__, log_level="INFO")


-def save_model_card(
-    repo_id: str, images: list = None, base_model: str = None, dataset_name: str = None, repo_folder: str = None
-):
+def save_model_card(repo_id: str, images=None, base_model=str, dataset_name=str, repo_folder=None):
    img_str = ""
    for i, image in enumerate(images):
        image.save(os.path.join(repo_folder, f"image_{i}.png"))
@@ -66,12 +66,12 @@ DATASET_NAME_MAPPING = {

 def save_model_card(
    repo_id: str,
-    images: list = None,
-    validation_prompt: str = None,
-    base_model: str = None,
-    dataset_name: str = None,
-    repo_folder: str = None,
-    vae_path: str = None,
+    images=None,
+    validation_prompt=None,
+    base_model=str,
+    dataset_name=str,
+    repo_folder=None,
+    vae_path=None,
 ):
    img_str = ""
    for i, image in enumerate(images):
@@ -38,6 +38,9 @@ class FromOriginalVAEMixin:
                    - A link to the `.ckpt` file (for example
                      `"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
                    - A path to a *file* containing all pipeline weights.
+            config_file (`str`, *optional*):
+                Filepath to the configuration YAML file associated with the model. If not provided it will default to:
+                https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml
            torch_dtype (`str` or `torch.dtype`, *optional*):
                Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
                dtype is automatically derived from the model's weights.
@@ -65,6 +68,13 @@ class FromOriginalVAEMixin:
            image_size (`int`, *optional*, defaults to 512):
                The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
                Diffusion v2 base model. Use 768 for Stable Diffusion v2.
+            scaling_factor (`float`, *optional*, defaults to 0.18215):
+                The component-wise standard deviation of the trained latent space computed using the first batch of the
+                training set. This is used to scale the latent space to have unit variance when training the diffusion
+                model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
+                diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z
+                = 1 / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution
+                Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
            use_safetensors (`bool`, *optional*, defaults to `None`):
                If set to `None`, the safetensors weights are downloaded if they're available **and** if the
                safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
@@ -92,6 +102,7 @@ class FromOriginalVAEMixin:
        """

        original_config_file = kwargs.pop("original_config_file", None)
+        config_file = kwargs.pop("config_file", None)
        resume_download = kwargs.pop("resume_download", False)
        force_download = kwargs.pop("force_download", False)
        proxies = kwargs.pop("proxies", None)
@@ -103,6 +114,13 @@ class FromOriginalVAEMixin:
        use_safetensors = kwargs.pop("use_safetensors", True)

        class_name = cls.__name__
+
+        if (config_file is not None) and (original_config_file is not None):
+            raise ValueError(
+                "You cannot pass both `config_file` and `original_config_file` to `from_single_file`. Please use only one of these arguments."
+            )
+
+        original_config_file = original_config_file or config_file
        original_config, checkpoint = fetch_ldm_config_and_checkpoint(
            pretrained_model_link_or_path=pretrained_model_link_or_path,
            class_name=class_name,
@@ -118,7 +136,10 @@ class FromOriginalVAEMixin:
        )

        image_size = kwargs.pop("image_size", None)
-        component = create_diffusers_vae_model_from_ldm(class_name, original_config, checkpoint, image_size=image_size)
+        scaling_factor = kwargs.pop("scaling_factor", None)
+        component = create_diffusers_vae_model_from_ldm(
+            class_name, original_config, checkpoint, image_size=image_size, scaling_factor=scaling_factor
+        )
        vae = component["vae"]
        if torch_dtype is not None:
            vae = vae.to(torch_dtype)
@@ -175,6 +175,7 @@ DIFFUSERS_TO_LDM_MAPPING = {
 }

 LDM_VAE_KEY = "first_stage_model."
+LDM_VAE_DEFAULT_SCALING_FACTOR = 0.18215
 LDM_UNET_KEY = "model.diffusion_model."
 LDM_CONTROLNET_KEY = "control_model."
 LDM_CLIP_PREFIX_TO_REMOVE = ["cond_stage_model.transformer.", "conditioner.embedders.0.transformer."]
@@ -518,7 +519,10 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None
    Creates a config for the diffusers based on the config of the LDM model.
    """
    vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"]
-    scaling_factor = scaling_factor or original_config["model"]["params"]["scale_factor"]
+    if scaling_factor is None and "scale_factor" in original_config["model"]["params"]:
+        scaling_factor = original_config["model"]["params"]["scale_factor"]
+    elif scaling_factor is None:
+        scaling_factor = LDM_VAE_DEFAULT_SCALING_FACTOR

    block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
    down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels)
@@ -1173,7 +1177,7 @@ def create_diffusers_unet_model_from_ldm(


 def create_diffusers_vae_model_from_ldm(
-    pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=0.18125
+    pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=None
 ):
    # import here to avoid circular imports
    from ..models import AutoencoderKL
@@ -981,9 +981,10 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
                allowed by Git.
-            custom_revision (`str`, *optional*):
+            custom_revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id similar to
-                `revision` when loading a custom pipeline from the Hub. Defaults to the latest stable 🤗 Diffusers version.
+                `revision` when loading a custom pipeline from the Hub. It can be a 🤗 Diffusers version when loading a
+                custom pipeline from GitHub, otherwise it defaults to `"main"` when loading from the Hub.
            mirror (`str`, *optional*):
                Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not
                guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
@@ -27,13 +27,7 @@ from diffusers import (
    PixArtAlphaPipeline,
    Transformer2DModel,
 )
-from diffusers.utils.testing_utils import (
-    enable_full_determinism,
-    numpy_cosine_similarity_distance,
-    require_torch_gpu,
-    slow,
-    torch_device,
-)
+from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device

 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import PipelineTesterMixin, to_np
@@ -338,35 +332,37 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
        torch.cuda.empty_cache()

    def test_pixart_1024(self):
-        generator = torch.Generator("cpu").manual_seed(0)
+        generator = torch.manual_seed(0)

        pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16)
        pipe.enable_model_cpu_offload()
        prompt = self.prompt

-        image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images
+        image = pipe(prompt, generator=generator, output_type="np").images

        image_slice = image[0, -3:, -3:, -1]
-        expected_slice = np.array([0.0742, 0.0835, 0.2114, 0.0295, 0.0784, 0.2361, 0.1738, 0.2251, 0.3589])

-        max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice)
-        self.assertLessEqual(max_diff, 1e-4)
+        expected_slice = np.array([0.1941, 0.2117, 0.2188, 0.1946, 0.218, 0.2124, 0.199, 0.2437, 0.2583])
+
+        max_diff = np.abs(image_slice.flatten() - expected_slice).max()
+        self.assertLessEqual(max_diff, 1e-3)

    def test_pixart_512(self):
-        generator = torch.Generator("cpu").manual_seed(0)
+        generator = torch.manual_seed(0)

        pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_512, torch_dtype=torch.float16)
        pipe.enable_model_cpu_offload()

        prompt = self.prompt

-        image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images
+        image = pipe(prompt, generator=generator, output_type="np").images

        image_slice = image[0, -3:, -3:, -1]
-        expected_slice = np.array([0.3477, 0.3882, 0.4541, 0.3413, 0.3821, 0.4463, 0.4001, 0.4409, 0.4958])

-        max_diff = numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice)
-        self.assertLessEqual(max_diff, 1e-4)
+        expected_slice = np.array([0.2637, 0.291, 0.2939, 0.207, 0.2512, 0.2783, 0.2168, 0.2324, 0.2817])
+
+        max_diff = np.abs(image_slice.flatten() - expected_slice).max()
+        self.assertLessEqual(max_diff, 1e-3)

    def test_pixart_1024_without_resolution_binning(self):
        generator = torch.manual_seed(0)
@@ -376,7 +372,7 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):

        prompt = self.prompt
        height, width = 1024, 768
-        num_inference_steps = 2
+        num_inference_steps = 10

        image = pipe(
            prompt,
@@ -410,7 +406,7 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):

        prompt = self.prompt
        height, width = 512, 768
-        num_inference_steps = 2
+        num_inference_steps = 10

        image = pipe(
            prompt,
Author	SHA1	Message	Date
Dhruv Nair	57afaff270	update	2024-02-13 03:47:00 +00:00
Dhruv Nair	e7b2032082	update	2024-02-12 18:22:36 +00:00
Dhruv Nair	2d5e9c2e39	update	2024-02-12 16:10:28 +00:00
Dhruv Nair	d68635f950	update	2024-02-12 14:24:24 +00:00