Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d3dd3ae933 | |||
| 97ee616971 | |||
| 0fc62d1702 | |||
| f4d3f913f4 | |||
| 1cab64b3be | |||
| 8d7dc85312 | |||
| 7a3548220a | |||
| 87a92f779c | |||
| 0db766ba77 | |||
| 8e94663503 |
@@ -24,9 +24,9 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
torch==2.1.2 \
|
||||
torchvision==0.16.2 \
|
||||
torchaudio==2.1.2 \
|
||||
onnxruntime \
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
|
||||
@@ -24,9 +24,9 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
torch==2.1.2 \
|
||||
torchvision==0.16.2 \
|
||||
torchaudio==2.1.2 \
|
||||
"onnxruntime-gpu>=1.13.1" \
|
||||
--extra-index-url https://download.pytorch.org/whl/cu117 && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
|
||||
@@ -26,9 +26,9 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3.9 -m pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
torch==2.1.2 \
|
||||
torchvision==0.16.2 \
|
||||
torchaudio==2.1.2 \
|
||||
invisible_watermark && \
|
||||
python3.9 -m pip install --no-cache-dir \
|
||||
accelerate \
|
||||
@@ -40,6 +40,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
|
||||
numpy \
|
||||
scipy \
|
||||
tensorboard \
|
||||
transformers
|
||||
|
||||
transformers
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
torch==2.1.2 \
|
||||
torchvision==0.16.2 \
|
||||
torchaudio==2.1.2 \
|
||||
invisible_watermark \
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
|
||||
@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
torch==2.1.2 \
|
||||
torchvision==0.16.2 \
|
||||
torchaudio==2.1.2 \
|
||||
invisible_watermark && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
accelerate \
|
||||
|
||||
@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
torch==2.1.2 \
|
||||
torchvision==0.16.2 \
|
||||
torchaudio==2.1.2 \
|
||||
invisible_watermark && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
accelerate \
|
||||
|
||||
@@ -299,9 +299,15 @@ def parse_args(input_args=None):
|
||||
parser.add_argument(
|
||||
"--beta_dpo",
|
||||
type=int,
|
||||
default=5000,
|
||||
default=2500,
|
||||
help="DPO KL Divergence penalty.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--loss_type",
|
||||
type=str,
|
||||
default="sigmoid",
|
||||
help="DPO loss type. Can be one of 'sigmoid' (default), 'ipo', or 'cpo'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--learning_rate",
|
||||
type=float,
|
||||
@@ -858,12 +864,19 @@ def main(args):
|
||||
accelerator.unwrap_model(unet).enable_adapters()
|
||||
|
||||
# Final loss.
|
||||
scale_term = -0.5 * args.beta_dpo
|
||||
inside_term = scale_term * (model_diff - ref_diff)
|
||||
loss = -1 * F.logsigmoid(inside_term.mean())
|
||||
logits = ref_diff - model_diff
|
||||
if args.loss_type == "sigmoid":
|
||||
loss = -1 * F.logsigmoid(args.beta_dpo * logits).mean()
|
||||
elif args.loss_type == "hinge":
|
||||
loss = torch.relu(1 - args.beta_dpo * logits).mean()
|
||||
elif args.loss_type == "ipo":
|
||||
losses = (logits - 1 / (2 * args.beta)) ** 2
|
||||
loss = losses.mean()
|
||||
else:
|
||||
raise ValueError(f"Unknown loss type {args.loss_type}")
|
||||
|
||||
implicit_acc = (inside_term > 0).sum().float() / inside_term.size(0)
|
||||
implicit_acc += 0.5 * (inside_term == 0).sum().float() / inside_term.size(0)
|
||||
implicit_acc = (logits > 0).sum().float() / logits.size(0)
|
||||
implicit_acc += 0.5 * (logits == 0).sum().float() / logits.size(0)
|
||||
|
||||
accelerator.backward(loss)
|
||||
if accelerator.sync_gradients:
|
||||
|
||||
@@ -975,7 +975,7 @@ def main(args):
|
||||
# Final loss.
|
||||
scale_term = -0.5 * args.beta_dpo
|
||||
inside_term = scale_term * (model_diff - ref_diff)
|
||||
loss = -1 * F.logsigmoid(inside_term.mean())
|
||||
loss = -1 * F.logsigmoid(inside_term).mean()
|
||||
|
||||
implicit_acc = (inside_term > 0).sum().float() / inside_term.size(0)
|
||||
implicit_acc += 0.5 * (inside_term == 0).sum().float() / inside_term.size(0)
|
||||
|
||||
@@ -126,8 +126,8 @@ _deps = [
|
||||
"regex!=2019.12.17",
|
||||
"requests",
|
||||
"tensorboard",
|
||||
"torch>=1.4",
|
||||
"torchvision",
|
||||
"torch>=1.4,<2.2.0",
|
||||
"torchvision<0.17",
|
||||
"transformers>=4.25.1",
|
||||
"urllib3<=2.0.0",
|
||||
]
|
||||
|
||||
@@ -38,8 +38,8 @@ deps = {
|
||||
"regex": "regex!=2019.12.17",
|
||||
"requests": "requests",
|
||||
"tensorboard": "tensorboard",
|
||||
"torch": "torch>=1.4",
|
||||
"torchvision": "torchvision",
|
||||
"torch": "torch>=1.4,<2.2.0",
|
||||
"torchvision": "torchvision<0.17",
|
||||
"transformers": "transformers>=4.25.1",
|
||||
"urllib3": "urllib3<=2.0.0",
|
||||
}
|
||||
|
||||
@@ -61,8 +61,9 @@ def build_sub_model_components(
|
||||
return unet_components
|
||||
|
||||
if component_name == "vae":
|
||||
scaling_factor = kwargs.get("scaling_factor", None)
|
||||
vae_components = create_diffusers_vae_model_from_ldm(
|
||||
pipeline_class_name, original_config, checkpoint, image_size
|
||||
pipeline_class_name, original_config, checkpoint, image_size, scaling_factor
|
||||
)
|
||||
return vae_components
|
||||
|
||||
|
||||
@@ -513,11 +513,12 @@ def create_controlnet_diffusers_config(original_config, image_size: int):
|
||||
return controlnet_config
|
||||
|
||||
|
||||
def create_vae_diffusers_config(original_config, image_size, scaling_factor=0.18125):
|
||||
def create_vae_diffusers_config(original_config, image_size, scaling_factor=None):
|
||||
"""
|
||||
Creates a config for the diffusers based on the config of the LDM model.
|
||||
"""
|
||||
vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"]
|
||||
scaling_factor = scaling_factor or original_config["model"]["params"]["scale_factor"]
|
||||
|
||||
block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
|
||||
down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels)
|
||||
|
||||
@@ -384,9 +384,9 @@ class ResnetBlock2D(nn.Module):
|
||||
raise ValueError(
|
||||
f" `temb` should not be None when `time_embedding_norm` is {self.time_embedding_norm}"
|
||||
)
|
||||
scale, shift = torch.chunk(temb, 2, dim=1)
|
||||
time_scale, time_shift = torch.chunk(temb, 2, dim=1)
|
||||
hidden_states = self.norm2(hidden_states)
|
||||
hidden_states = hidden_states * (1 + scale) + shift
|
||||
hidden_states = hidden_states * (1 + time_scale) + time_shift
|
||||
else:
|
||||
hidden_states = self.norm2(hidden_states)
|
||||
|
||||
|
||||
@@ -477,7 +477,10 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
@@ -498,7 +501,8 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as sample
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
|
||||
timesteps = timesteps.to(sample.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -602,7 +602,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
@@ -623,7 +626,8 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as sample
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
|
||||
timesteps = timesteps.to(sample.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -503,7 +503,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
@@ -523,7 +526,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as sample
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
|
||||
timesteps = timesteps.to(sample.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -594,7 +594,10 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
@@ -615,7 +618,8 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
||||
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as sample
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
|
||||
timesteps = timesteps.to(sample.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -575,7 +575,10 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
@@ -596,7 +599,8 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
||||
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as sample
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
|
||||
timesteps = timesteps.to(sample.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -455,7 +455,10 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -1069,7 +1069,10 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -332,7 +332,10 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
|
||||
timesteps: torch.IntTensor,
|
||||
) -> torch.FloatTensor:
|
||||
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
|
||||
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
|
||||
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
|
||||
# for the subsequent add_noise calls
|
||||
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
|
||||
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
|
||||
timesteps = timesteps.to(original_samples.device)
|
||||
|
||||
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
|
||||
|
||||
@@ -125,7 +125,10 @@ def export_to_video(
|
||||
if output_video_path is None:
|
||||
output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4").name
|
||||
|
||||
if isinstance(video_frames[0], PIL.Image.Image):
|
||||
if isinstance(video_frames[0], np.ndarray):
|
||||
video_frames = [(frame * 255).astype(np.uint8) for frame in video_frames]
|
||||
|
||||
elif isinstance(video_frames[0], PIL.Image.Image):
|
||||
video_frames = [np.array(frame) for frame in video_frames]
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
||||
|
||||
@@ -31,6 +31,7 @@ from diffusers import (
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
is_flaky,
|
||||
load_image,
|
||||
load_numpy,
|
||||
require_torch_gpu,
|
||||
@@ -244,8 +245,9 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
|
||||
def test_float16_inference(self):
|
||||
super().test_float16_inference(expected_max_diff=5e-1)
|
||||
|
||||
@is_flaky()
|
||||
def test_model_cpu_offload_forward_pass(self):
|
||||
super().test_inference_batch_single_identical(expected_max_diff=5e-4)
|
||||
super().test_inference_batch_single_identical(expected_max_diff=8e-4)
|
||||
|
||||
def test_save_load_optional_components(self):
|
||||
super().test_save_load_optional_components(expected_max_difference=5e-4)
|
||||
|
||||
Reference in New Issue
Block a user