[scheduler] fix a bug in add_noise (#7386)
* fix * fix * add a tests * fix --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> Co-authored-by: yiyixuxu <yixu310@gmail,com>
This commit is contained in:
-3
@@ -528,15 +528,12 @@ class StableDiffusionInpaintPipelineLegacy(
|
|||||||
f" {negative_prompt_embeds.shape}."
|
f" {negative_prompt_embeds.shape}."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
|
||||||
def get_timesteps(self, num_inference_steps, strength, device):
|
def get_timesteps(self, num_inference_steps, strength, device):
|
||||||
# get the original timestep using init_timestep
|
# get the original timestep using init_timestep
|
||||||
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
||||||
|
|
||||||
t_start = max(num_inference_steps - init_timestep, 0)
|
t_start = max(num_inference_steps - init_timestep, 0)
|
||||||
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
||||||
if hasattr(self.scheduler, "set_begin_index"):
|
|
||||||
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
|
||||||
|
|
||||||
return timesteps, num_inference_steps - t_start
|
return timesteps, num_inference_steps - t_start
|
||||||
|
|
||||||
|
|||||||
-3
@@ -716,15 +716,12 @@ class StableDiffusionDiffEditPipeline(
|
|||||||
f" `source_negative_prompt_embeds` {source_negative_prompt_embeds.shape}."
|
f" `source_negative_prompt_embeds` {source_negative_prompt_embeds.shape}."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
|
||||||
def get_timesteps(self, num_inference_steps, strength, device):
|
def get_timesteps(self, num_inference_steps, strength, device):
|
||||||
# get the original timestep using init_timestep
|
# get the original timestep using init_timestep
|
||||||
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
||||||
|
|
||||||
t_start = max(num_inference_steps - init_timestep, 0)
|
t_start = max(num_inference_steps - init_timestep, 0)
|
||||||
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
||||||
if hasattr(self.scheduler, "set_begin_index"):
|
|
||||||
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
|
||||||
|
|
||||||
return timesteps, num_inference_steps - t_start
|
return timesteps, num_inference_steps - t_start
|
||||||
|
|
||||||
|
|||||||
@@ -434,7 +434,11 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -768,10 +768,14 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||||
timesteps = timesteps.to(original_samples.device)
|
timesteps = timesteps.to(original_samples.device)
|
||||||
|
|
||||||
# begin_index is None when the scheduler is used for training
|
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -1011,10 +1011,14 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||||
timesteps = timesteps.to(original_samples.device)
|
timesteps = timesteps.to(original_samples.device)
|
||||||
|
|
||||||
# begin_index is None when the scheduler is used for training
|
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -543,7 +543,11 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -961,10 +961,14 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||||
timesteps = timesteps.to(original_samples.device)
|
timesteps = timesteps.to(original_samples.device)
|
||||||
|
|
||||||
# begin_index is None when the scheduler is used for training
|
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -669,7 +669,11 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -367,7 +367,11 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -467,7 +467,11 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -562,7 +562,11 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -468,7 +468,11 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -494,7 +494,11 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -469,7 +469,11 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -461,7 +461,11 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -862,10 +862,14 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|||||||
schedule_timesteps = self.timesteps.to(original_samples.device)
|
schedule_timesteps = self.timesteps.to(original_samples.device)
|
||||||
timesteps = timesteps.to(original_samples.device)
|
timesteps = timesteps.to(original_samples.device)
|
||||||
|
|
||||||
# begin_index is None when the scheduler is used for training
|
# begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index
|
||||||
if self.begin_index is None:
|
if self.begin_index is None:
|
||||||
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
|
||||||
|
elif self.step_index is not None:
|
||||||
|
# add_noise is called after first denoising step (for inpainting)
|
||||||
|
step_indices = [self.step_index] * timesteps.shape[0]
|
||||||
else:
|
else:
|
||||||
|
# add noise is called bevore first denoising step to create inital latent(img2img)
|
||||||
step_indices = [self.begin_index] * timesteps.shape[0]
|
step_indices = [self.begin_index] * timesteps.shape[0]
|
||||||
|
|
||||||
sigma = sigmas[step_indices].flatten()
|
sigma = sigmas[step_indices].flatten()
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ from diffusers import (
|
|||||||
AutoencoderKL,
|
AutoencoderKL,
|
||||||
DDIMScheduler,
|
DDIMScheduler,
|
||||||
DPMSolverMultistepScheduler,
|
DPMSolverMultistepScheduler,
|
||||||
|
EulerAncestralDiscreteScheduler,
|
||||||
LCMScheduler,
|
LCMScheduler,
|
||||||
LMSDiscreteScheduler,
|
LMSDiscreteScheduler,
|
||||||
PNDMScheduler,
|
PNDMScheduler,
|
||||||
@@ -557,6 +558,29 @@ class StableDiffusionSimpleInpaintPipelineFastTests(StableDiffusionInpaintPipeli
|
|||||||
image_slice2 = images[1, -3:, -3:, -1]
|
image_slice2 = images[1, -3:, -3:, -1]
|
||||||
assert np.abs(image_slice1.flatten() - image_slice2.flatten()).max() > 1e-2
|
assert np.abs(image_slice1.flatten() - image_slice2.flatten()).max() > 1e-2
|
||||||
|
|
||||||
|
def test_stable_diffusion_inpaint_euler(self):
|
||||||
|
device = "cpu" # ensure determinism for the device-dependent torch.Generator
|
||||||
|
components = self.get_dummy_components(time_cond_proj_dim=256)
|
||||||
|
sd_pipe = StableDiffusionInpaintPipeline(**components)
|
||||||
|
sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config)
|
||||||
|
sd_pipe = sd_pipe.to(device)
|
||||||
|
sd_pipe.set_progress_bar_config(disable=None)
|
||||||
|
|
||||||
|
inputs = self.get_dummy_inputs(device, output_pil=False)
|
||||||
|
half_dim = inputs["image"].shape[2] // 2
|
||||||
|
inputs["mask_image"][0, 0, :half_dim, :half_dim] = 0
|
||||||
|
|
||||||
|
inputs["num_inference_steps"] = 4
|
||||||
|
image = sd_pipe(**inputs).images
|
||||||
|
image_slice = image[0, -3:, -3:, -1]
|
||||||
|
|
||||||
|
assert image.shape == (1, 64, 64, 3)
|
||||||
|
|
||||||
|
expected_slice = np.array(
|
||||||
|
[[0.6387283, 0.5564158, 0.58631873, 0.5539942, 0.5494673, 0.6461868, 0.5251618, 0.5497595, 0.5508756]]
|
||||||
|
)
|
||||||
|
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-4
|
||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
|
|||||||
Reference in New Issue
Block a user