Compare commits
47 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 390d0879fe | |||
| 1fb8082af9 | |||
| 54e6f2d99e | |||
| 22326b4971 | |||
| 7cc0ba0070 | |||
| e4bd3f5a05 | |||
| 25d0277911 | |||
| 14d7fe3f9e | |||
| 2f4e29d179 | |||
| b28311d9e5 | |||
| 8a1020c91e | |||
| 2483c57c44 | |||
| f18942b3e6 | |||
| 7021a35eeb | |||
| e0455537c2 | |||
| c61360099b | |||
| b585832d4e | |||
| 963c73c229 | |||
| 09c595f212 | |||
| b1c5030418 | |||
| 17e801ccb9 | |||
| 468008f6b7 | |||
| 10e7d39f58 | |||
| 49c01d4a4b | |||
| f2e6c24df2 | |||
| ce659bc586 | |||
| 615c12ab68 | |||
| e34d9f1949 | |||
| 4106e3f182 | |||
| 1651c9a1a1 | |||
| 4bfdb34b36 | |||
| 3d4f987cc9 | |||
| 1feac0469b | |||
| 75e5cd046b | |||
| 865b6638f7 | |||
| 428c952289 | |||
| d16c921346 | |||
| c46380165a | |||
| a6a89aa199 | |||
| 9e9a49ca24 | |||
| b9c90f7e22 | |||
| 19fc3281c5 | |||
| fe8f0c9a76 | |||
| dc0bef11bd | |||
| 27ef43f043 | |||
| 5d5c61bd09 | |||
| 7c38bff847 |
@@ -0,0 +1,100 @@
|
||||
name: Slow Test Memory Checks
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ shm-size ]
|
||||
|
||||
env:
|
||||
DIFFUSERS_IS_CI: yes
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 1
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
PYTEST_TIMEOUT: 600
|
||||
RUN_SLOW: yes
|
||||
PIPELINE_USAGE_CUTOFF: 50000
|
||||
|
||||
jobs:
|
||||
setup_torch_cuda_pipeline_matrix:
|
||||
name: Setup Torch Pipelines CUDA Slow Tests Matrix
|
||||
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
|
||||
container:
|
||||
image: diffusers/diffusers-pytorch-cpu
|
||||
outputs:
|
||||
pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
- name: Fetch Pipeline Matrix
|
||||
id: fetch_pipeline_matrix
|
||||
run: |
|
||||
matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
|
||||
echo $matrix
|
||||
echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
|
||||
- name: Pipeline Tests Artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: test-pipelines.json
|
||||
path: reports
|
||||
|
||||
torch_pipelines_cuda_tests:
|
||||
name: Torch Pipelines CUDA Slow Tests
|
||||
needs: setup_torch_cuda_pipeline_matrix
|
||||
strategy:
|
||||
max-parallel: 4
|
||||
fail-fast: false
|
||||
matrix:
|
||||
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
|
||||
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||
container:
|
||||
image: diffusers/diffusers-pytorch-cuda
|
||||
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
|
||||
steps:
|
||||
- name: Checkout diffusers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install libsndfile1-dev libgl1 -y
|
||||
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
|
||||
python -m uv pip install -e [quality,test]
|
||||
python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
|
||||
python -m uv pip install hf_transfer
|
||||
- name: Environment
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
- name: Slow PyTorch CUDA checkpoint tests on Ubuntu
|
||||
env:
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
|
||||
CUBLAS_WORKSPACE_CONFIG: :16:8
|
||||
run: |
|
||||
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
|
||||
-s -v -k "not Flax and not Onnx" \
|
||||
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
|
||||
tests/pipelines/${{ matrix.module }}
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
|
||||
cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: pipeline_${{ matrix.module }}_test_reports
|
||||
path: reports
|
||||
@@ -1,72 +0,0 @@
|
||||
import torch
|
||||
from fa3_processor import FA3AttnProcessor
|
||||
from diffusers import DiffusionPipeline
|
||||
import argparse
|
||||
import torch.utils.benchmark as benchmark
|
||||
import gc
|
||||
import json
|
||||
|
||||
def flush():
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
def bytes_to_giga_bytes(bytes):
|
||||
return f"{(bytes / 1024 / 1024 / 1024):.3f}"
|
||||
|
||||
def benchmark_fn(f, *args, **kwargs):
|
||||
t0 = benchmark.Timer(
|
||||
stmt="f(*args, **kwargs)",
|
||||
globals={"args": args, "kwargs": kwargs, "f": f},
|
||||
num_threads=torch.get_num_threads(),
|
||||
)
|
||||
return f"{(t0.blocked_autorange().mean):.3f}"
|
||||
|
||||
def load_pipeline(args):
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
"PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", torch_dtype=torch.float16
|
||||
).to("cuda")
|
||||
if args.fa3:
|
||||
pipeline.transformer.set_attn_processor(FA3AttnProcessor())
|
||||
pipeline.vae.set_attn_processor(FA3AttnProcessor())
|
||||
|
||||
pipeline.set_progress_bar_config(disable=True)
|
||||
return pipeline
|
||||
|
||||
def run_pipeline(pipeline, args):
|
||||
_ = pipeline(
|
||||
prompt="a cat with tiger-like looks",
|
||||
num_images_per_prompt=args.batch_size,
|
||||
guidance_scale=7.5
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--fa3", default=0, type=int)
|
||||
parser.add_argument("--batch_size", default=1, type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
flush()
|
||||
|
||||
pipeline = load_pipeline(args)
|
||||
|
||||
for _ in range(3):
|
||||
run_pipeline(pipeline, args)
|
||||
|
||||
time = benchmark_fn(run_pipeline, pipeline, args)
|
||||
memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
|
||||
data_dict = dict(time=time, memory=memory)
|
||||
print(f"FA3: {bool(args.fa3)} Time: {time} seconds Memory: {memory} GB")
|
||||
|
||||
filename_prefix = f"fa3@{args.fa3}-bs@{args.batch_size}"
|
||||
with open(f"{filename_prefix}.json", "w") as f:
|
||||
json.dump(data_dict, f)
|
||||
|
||||
image = pipeline(
|
||||
prompt="a cat with tiger-like looks",
|
||||
num_images_per_prompt=args.batch_size,
|
||||
num_inference_steps=25,
|
||||
guidance_scale=7.5
|
||||
).images[0]
|
||||
image.save(f"{filename_prefix}.png")
|
||||
@@ -1,95 +0,0 @@
|
||||
import torch
|
||||
from flash_attn_interface import flash_attn_func
|
||||
|
||||
class FA3AttnProcessor:
|
||||
r"""
|
||||
Processor for using Flash Attention 3 (FA3) via `flash-attn`.
|
||||
|
||||
To install `flash-attn` that supports FA3, follow:
|
||||
https://github.com/Dao-AILab/flash-attention?tab=readme-ov-file#flashattention-3-beta-release
|
||||
|
||||
Reference: https://tridao.me/blog/2024/flash3/
|
||||
"""
|
||||
def __call__(
|
||||
self,
|
||||
attn,
|
||||
hidden_states,
|
||||
encoder_hidden_states=None,
|
||||
attention_mask=None,
|
||||
temb=None,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> torch.Tensor:
|
||||
residual = hidden_states
|
||||
|
||||
if attn.spatial_norm is not None:
|
||||
hidden_states = attn.spatial_norm(hidden_states, temb)
|
||||
|
||||
input_ndim = hidden_states.ndim
|
||||
|
||||
if input_ndim == 4:
|
||||
batch_size, channel, height, width = hidden_states.shape
|
||||
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
|
||||
|
||||
batch_size, key_tokens, _ = (
|
||||
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
|
||||
)
|
||||
|
||||
attention_mask = attn.prepare_attention_mask(attention_mask, key_tokens, batch_size)
|
||||
if attention_mask is not None:
|
||||
# expand our mask's singleton query_tokens dimension:
|
||||
# [batch*heads, 1, key_tokens] ->
|
||||
# [batch*heads, query_tokens, key_tokens]
|
||||
# so that it can be added as a bias onto the attention scores that xformers computes:
|
||||
# [batch*heads, query_tokens, key_tokens]
|
||||
# we do this explicitly because xformers doesn't broadcast the singleton dimension for us.
|
||||
_, query_tokens, _ = hidden_states.shape
|
||||
attention_mask = attention_mask.expand(-1, query_tokens, -1)
|
||||
|
||||
if attn.group_norm is not None:
|
||||
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
|
||||
|
||||
query = attn.to_q(hidden_states)
|
||||
|
||||
if encoder_hidden_states is None:
|
||||
encoder_hidden_states = hidden_states
|
||||
elif attn.norm_cross:
|
||||
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
|
||||
|
||||
key = attn.to_k(encoder_hidden_states)
|
||||
value = attn.to_v(encoder_hidden_states)
|
||||
|
||||
inner_dim = key.shape[-1]
|
||||
head_dim = inner_dim // attn.heads
|
||||
|
||||
query = query.view(batch_size, -1, attn.heads, head_dim).contiguous()
|
||||
key = key.view(batch_size, -1, attn.heads, head_dim).contiguous()
|
||||
value = value.view(batch_size, -1, attn.heads, head_dim).contiguous()
|
||||
|
||||
# nasty hack to make the head number and head dim compatible with FA3.
|
||||
# if attn.heads ==1 and head_dim == 512:
|
||||
# factor = 8
|
||||
# new_head_dim = head_dim // factor
|
||||
# query = query.view(batch_size, -1, factor, new_head_dim)
|
||||
# key = key.view(batch_size, -1, factor, new_head_dim)
|
||||
# value = value.view(batch_size, -1, factor, new_head_dim)
|
||||
hidden_states, _ = flash_attn_func(
|
||||
query, key, value, softmax_scale=attn.scale, causal=False
|
||||
)
|
||||
hidden_states = hidden_states.reshape(batch_size, -1, attn.heads * head_dim)
|
||||
hidden_states = hidden_states.to(query.dtype)
|
||||
|
||||
# linear proj
|
||||
hidden_states = attn.to_out[0](hidden_states)
|
||||
# dropout
|
||||
hidden_states = attn.to_out[1](hidden_states)
|
||||
|
||||
if input_ndim == 4:
|
||||
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
|
||||
|
||||
if attn.residual_connection:
|
||||
hidden_states = hidden_states + residual
|
||||
|
||||
hidden_states = hidden_states / attn.rescale_output_factor
|
||||
|
||||
return hidden_states
|
||||
@@ -11,7 +11,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@@ -19,7 +19,6 @@ from torch import nn
|
||||
from ...configuration_utils import ConfigMixin, register_to_config
|
||||
from ...utils import is_torch_version, logging
|
||||
from ..attention import BasicTransformerBlock
|
||||
from ..attention_processor import AttentionProcessor
|
||||
from ..embeddings import PatchEmbed, PixArtAlphaTextProjection
|
||||
from ..modeling_outputs import Transformer2DModelOutput
|
||||
from ..modeling_utils import ModelMixin
|
||||
@@ -187,64 +186,6 @@ class PixArtTransformer2DModel(ModelMixin, ConfigMixin):
|
||||
if hasattr(module, "gradient_checkpointing"):
|
||||
module.gradient_checkpointing = value
|
||||
|
||||
@property
|
||||
def attn_processors(self) -> Dict[str, AttentionProcessor]:
|
||||
r"""
|
||||
Returns:
|
||||
`dict` of attention processors: A dictionary containing all attention processors used in the model with
|
||||
indexed by its weight name.
|
||||
"""
|
||||
# set recursively
|
||||
processors = {}
|
||||
|
||||
def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
|
||||
if hasattr(module, "get_processor"):
|
||||
processors[f"{name}.processor"] = module.get_processor()
|
||||
|
||||
for sub_name, child in module.named_children():
|
||||
fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
|
||||
|
||||
return processors
|
||||
|
||||
for name, module in self.named_children():
|
||||
fn_recursive_add_processors(name, module, processors)
|
||||
|
||||
return processors
|
||||
|
||||
def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
|
||||
r"""
|
||||
Sets the attention processor to use to compute attention.
|
||||
|
||||
Parameters:
|
||||
processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
|
||||
The instantiated processor class or a dictionary of processor classes that will be set as the processor
|
||||
for **all** `Attention` layers.
|
||||
|
||||
If `processor` is a dict, the key needs to define the path to the corresponding cross attention
|
||||
processor. This is strongly recommended when setting trainable attention processors.
|
||||
|
||||
"""
|
||||
count = len(self.attn_processors.keys())
|
||||
|
||||
if isinstance(processor, dict) and len(processor) != count:
|
||||
raise ValueError(
|
||||
f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
|
||||
f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
|
||||
)
|
||||
|
||||
def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
|
||||
if hasattr(module, "set_processor"):
|
||||
if not isinstance(processor, dict):
|
||||
module.set_processor(processor)
|
||||
else:
|
||||
module.set_processor(processor.pop(f"{name}.processor"))
|
||||
|
||||
for sub_name, child in module.named_children():
|
||||
fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
|
||||
|
||||
for name, module in self.named_children():
|
||||
fn_recursive_attn_processor(name, module, processor)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
|
||||
@@ -76,7 +76,6 @@ def fetch_pipeline_modules_to_test():
|
||||
test_modules = []
|
||||
for pipeline_name in pipeline_objects:
|
||||
module = getattr(diffusers, pipeline_name)
|
||||
|
||||
test_module = module.__module__.split(".")[-2].strip()
|
||||
test_modules.append(test_module)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user