Compare commits

..

1 Commits

Author SHA1 Message Date
sayakpaul a31df720fe add note about serialization 2024-01-30 17:38:52 +05:30
23 changed files with 50 additions and 99 deletions
+3 -3
View File
@@ -24,9 +24,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
onnxruntime \
--extra-index-url https://download.pytorch.org/whl/cpu && \
python3 -m pip install --no-cache-dir \
+3 -3
View File
@@ -24,9 +24,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
"onnxruntime-gpu>=1.13.1" \
--extra-index-url https://download.pytorch.org/whl/cu117 && \
python3 -m pip install --no-cache-dir \
@@ -26,9 +26,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
python3.9 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3.9 -m pip install --no-cache-dir \
accelerate \
@@ -40,6 +40,6 @@ RUN python3.9 -m pip install --no-cache-dir --upgrade pip && \
numpy \
scipy \
tensorboard \
transformers
transformers
CMD ["/bin/bash"]
+3 -3
View File
@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark \
--extra-index-url https://download.pytorch.org/whl/cpu && \
python3 -m pip install --no-cache-dir \
+3 -3
View File
@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3 -m pip install --no-cache-dir \
accelerate \
@@ -25,9 +25,9 @@ ENV PATH="/opt/venv/bin:$PATH"
# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
python3 -m pip install --no-cache-dir \
torch==2.1.2 \
torchvision==0.16.2 \
torchaudio==2.1.2 \
torch \
torchvision \
torchaudio \
invisible_watermark && \
python3 -m pip install --no-cache-dir \
accelerate \
@@ -299,15 +299,9 @@ def parse_args(input_args=None):
parser.add_argument(
"--beta_dpo",
type=int,
default=2500,
default=5000,
help="DPO KL Divergence penalty.",
)
parser.add_argument(
"--loss_type",
type=str,
default="sigmoid",
help="DPO loss type. Can be one of 'sigmoid' (default), 'ipo', or 'cpo'",
)
parser.add_argument(
"--learning_rate",
type=float,
@@ -864,19 +858,12 @@ def main(args):
accelerator.unwrap_model(unet).enable_adapters()
# Final loss.
logits = ref_diff - model_diff
if args.loss_type == "sigmoid":
loss = -1 * F.logsigmoid(args.beta_dpo * logits).mean()
elif args.loss_type == "hinge":
loss = torch.relu(1 - args.beta_dpo * logits).mean()
elif args.loss_type == "ipo":
losses = (logits - 1 / (2 * args.beta)) ** 2
loss = losses.mean()
else:
raise ValueError(f"Unknown loss type {args.loss_type}")
scale_term = -0.5 * args.beta_dpo
inside_term = scale_term * (model_diff - ref_diff)
loss = -1 * F.logsigmoid(inside_term.mean())
implicit_acc = (logits > 0).sum().float() / logits.size(0)
implicit_acc += 0.5 * (logits == 0).sum().float() / logits.size(0)
implicit_acc = (inside_term > 0).sum().float() / inside_term.size(0)
implicit_acc += 0.5 * (inside_term == 0).sum().float() / inside_term.size(0)
accelerator.backward(loss)
if accelerator.sync_gradients:
@@ -975,7 +975,7 @@ def main(args):
# Final loss.
scale_term = -0.5 * args.beta_dpo
inside_term = scale_term * (model_diff - ref_diff)
loss = -1 * F.logsigmoid(inside_term).mean()
loss = -1 * F.logsigmoid(inside_term.mean())
implicit_acc = (inside_term > 0).sum().float() / inside_term.size(0)
implicit_acc += 0.5 * (inside_term == 0).sum().float() / inside_term.size(0)
+2 -2
View File
@@ -126,8 +126,8 @@ _deps = [
"regex!=2019.12.17",
"requests",
"tensorboard",
"torch>=1.4,<2.2.0",
"torchvision<0.17",
"torch>=1.4",
"torchvision",
"transformers>=4.25.1",
"urllib3<=2.0.0",
]
+2 -2
View File
@@ -38,8 +38,8 @@ deps = {
"regex": "regex!=2019.12.17",
"requests": "requests",
"tensorboard": "tensorboard",
"torch": "torch>=1.4,<2.2.0",
"torchvision": "torchvision<0.17",
"torch": "torch>=1.4",
"torchvision": "torchvision",
"transformers": "transformers>=4.25.1",
"urllib3": "urllib3<=2.0.0",
}
+1 -2
View File
@@ -61,9 +61,8 @@ def build_sub_model_components(
return unet_components
if component_name == "vae":
scaling_factor = kwargs.get("scaling_factor", None)
vae_components = create_diffusers_vae_model_from_ldm(
pipeline_class_name, original_config, checkpoint, image_size, scaling_factor
pipeline_class_name, original_config, checkpoint, image_size
)
return vae_components
+1 -2
View File
@@ -513,12 +513,11 @@ def create_controlnet_diffusers_config(original_config, image_size: int):
return controlnet_config
def create_vae_diffusers_config(original_config, image_size, scaling_factor=None):
def create_vae_diffusers_config(original_config, image_size, scaling_factor=0.18125):
"""
Creates a config for the diffusers based on the config of the LDM model.
"""
vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"]
scaling_factor = scaling_factor or original_config["model"]["params"]["scale_factor"]
block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels)
+2 -2
View File
@@ -384,9 +384,9 @@ class ResnetBlock2D(nn.Module):
raise ValueError(
f" `temb` should not be None when `time_embedding_norm` is {self.time_embedding_norm}"
)
time_scale, time_shift = torch.chunk(temb, 2, dim=1)
scale, shift = torch.chunk(temb, 2, dim=1)
hidden_states = self.norm2(hidden_states)
hidden_states = hidden_states * (1 + time_scale) + time_shift
hidden_states = hidden_states * (1 + scale) + shift
else:
hidden_states = self.norm2(hidden_states)
+2 -6
View File
@@ -477,10 +477,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -501,8 +498,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -602,10 +602,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -626,8 +623,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+2 -6
View File
@@ -503,10 +503,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -526,8 +523,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -594,10 +594,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -618,8 +615,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+2 -6
View File
@@ -575,10 +575,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -599,8 +596,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+1 -4
View File
@@ -455,10 +455,7 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -1069,10 +1069,7 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
@@ -332,10 +332,7 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
timesteps: torch.IntTensor,
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device)
sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+1 -4
View File
@@ -125,10 +125,7 @@ def export_to_video(
if output_video_path is None:
output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4").name
if isinstance(video_frames[0], np.ndarray):
video_frames = [(frame * 255).astype(np.uint8) for frame in video_frames]
elif isinstance(video_frames[0], PIL.Image.Image):
if isinstance(video_frames[0], PIL.Image.Image):
video_frames = [np.array(frame) for frame in video_frames]
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
@@ -31,7 +31,6 @@ from diffusers import (
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
is_flaky,
load_image,
load_numpy,
require_torch_gpu,
@@ -245,9 +244,8 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
def test_float16_inference(self):
super().test_float16_inference(expected_max_diff=5e-1)
@is_flaky()
def test_model_cpu_offload_forward_pass(self):
super().test_inference_batch_single_identical(expected_max_diff=8e-4)
super().test_inference_batch_single_identical(expected_max_diff=5e-4)
def test_save_load_optional_components(self):
super().test_save_load_optional_components(expected_max_difference=5e-4)