Sequential cpu offload: require accelerate 0.14.0 (#2517)
* Sequential cpu offload: require accelerate 0.14.0. * Import is_accelerate_version * Missing copy.
This commit is contained in:
@@ -191,10 +191,10 @@ class AltDiffusionPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -213,10 +213,10 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -230,10 +230,10 @@ class CycleDiffusionPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -194,10 +194,10 @@ class StableDiffusionPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
+3
-3
@@ -24,7 +24,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
|
|||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...models.cross_attention import CrossAttention
|
from ...models.cross_attention import CrossAttention
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ...schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring
|
from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ..pipeline_utils import DiffusionPipeline
|
||||||
from . import StableDiffusionPipelineOutput
|
from . import StableDiffusionPipelineOutput
|
||||||
from .safety_checker import StableDiffusionSafetyChecker
|
from .safety_checker import StableDiffusionSafetyChecker
|
||||||
@@ -256,10 +256,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -218,10 +218,10 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -265,10 +265,10 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
+2
-2
@@ -209,10 +209,10 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
+2
-2
@@ -398,10 +398,10 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -130,10 +130,10 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
|
|||||||
|
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...schedulers import DDIMScheduler, PNDMScheduler
|
from ...schedulers import DDIMScheduler, PNDMScheduler
|
||||||
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring
|
from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ..pipeline_utils import DiffusionPipeline
|
||||||
from . import StableDiffusionPipelineOutput
|
from . import StableDiffusionPipelineOutput
|
||||||
from .safety_checker import StableDiffusionSafetyChecker
|
from .safety_checker import StableDiffusionSafetyChecker
|
||||||
@@ -151,10 +151,10 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -365,10 +365,10 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
|
|||||||
|
|
||||||
from ...models import AutoencoderKL, UNet2DConditionModel
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
||||||
from ...schedulers import KarrasDiffusionSchedulers
|
from ...schedulers import KarrasDiffusionSchedulers
|
||||||
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring
|
from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ..pipeline_utils import DiffusionPipeline
|
||||||
from . import StableDiffusionPipelineOutput
|
from . import StableDiffusionPipelineOutput
|
||||||
from .safety_checker import StableDiffusionSafetyChecker
|
from .safety_checker import StableDiffusionSafetyChecker
|
||||||
@@ -169,10 +169,10 @@ class StableDiffusionSAGPipeline(DiffusionPipeline):
|
|||||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||||
`enable_model_cpu_offload`, but performance is lower.
|
`enable_model_cpu_offload`, but performance is lower.
|
||||||
"""
|
"""
|
||||||
if is_accelerate_available():
|
if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
||||||
from accelerate import cpu_offload
|
from accelerate import cpu_offload
|
||||||
else:
|
else:
|
||||||
raise ImportError("Please install accelerate via `pip install accelerate`")
|
raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
|
||||||
|
|
||||||
device = torch.device(f"cuda:{gpu_id}")
|
device = torch.device(f"cuda:{gpu_id}")
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,16 @@ from diffusers import (
|
|||||||
StableDiffusionDepth2ImgPipeline,
|
StableDiffusionDepth2ImgPipeline,
|
||||||
UNet2DConditionModel,
|
UNet2DConditionModel,
|
||||||
)
|
)
|
||||||
from diffusers.utils import floats_tensor, is_accelerate_available, load_image, load_numpy, nightly, slow, torch_device
|
from diffusers.utils import (
|
||||||
|
floats_tensor,
|
||||||
|
is_accelerate_available,
|
||||||
|
is_accelerate_version,
|
||||||
|
load_image,
|
||||||
|
load_numpy,
|
||||||
|
nightly,
|
||||||
|
slow,
|
||||||
|
torch_device,
|
||||||
|
)
|
||||||
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
|
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
|
||||||
|
|
||||||
from ...test_pipelines_common import PipelineTesterMixin
|
from ...test_pipelines_common import PipelineTesterMixin
|
||||||
@@ -227,8 +236,8 @@ class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.Te
|
|||||||
self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.")
|
self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.")
|
||||||
|
|
||||||
@unittest.skipIf(
|
@unittest.skipIf(
|
||||||
torch_device != "cuda" or not is_accelerate_available(),
|
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
|
||||||
reason="CPU offload is only available with CUDA and `accelerate` installed",
|
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
|
||||||
)
|
)
|
||||||
def test_cpu_offload_forward_pass(self):
|
def test_cpu_offload_forward_pass(self):
|
||||||
components = self.get_dummy_components()
|
components = self.get_dummy_components()
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import torch
|
|||||||
import diffusers
|
import diffusers
|
||||||
from diffusers import DiffusionPipeline
|
from diffusers import DiffusionPipeline
|
||||||
from diffusers.utils import logging
|
from diffusers.utils import logging
|
||||||
from diffusers.utils.import_utils import is_accelerate_available, is_xformers_available
|
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
|
||||||
from diffusers.utils.testing_utils import require_torch, torch_device
|
from diffusers.utils.testing_utils import require_torch, torch_device
|
||||||
|
|
||||||
|
|
||||||
@@ -417,8 +417,8 @@ class PipelineTesterMixin:
|
|||||||
assert_mean_pixel_difference(output_with_slicing[0], output_without_slicing[0])
|
assert_mean_pixel_difference(output_with_slicing[0], output_without_slicing[0])
|
||||||
|
|
||||||
@unittest.skipIf(
|
@unittest.skipIf(
|
||||||
torch_device != "cuda" or not is_accelerate_available(),
|
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
|
||||||
reason="CPU offload is only available with CUDA and `accelerate` installed",
|
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
|
||||||
)
|
)
|
||||||
def test_cpu_offload_forward_pass(self):
|
def test_cpu_offload_forward_pass(self):
|
||||||
if not self.test_cpu_offload:
|
if not self.test_cpu_offload:
|
||||||
|
|||||||
Reference in New Issue
Block a user