update

Merge branch 'main' into shm-size
2024-07-15 12:57:28 +00:00 · 2024-07-15 12:51:14 +00:00 · 2024-07-04 09:12:16 +00:00 · 2024-06-04 08:33:22 +00:00 · 2024-06-03 04:52:47 +00:00 · 2024-06-03 04:51:32 +00:00
5 changed files with 101 additions and 228 deletions
@@ -0,0 +1,100 @@
+name: Slow Test Memory Checks
+
+on:
+  push:
+    branches: [ shm-size ]
+
+env:
+  DIFFUSERS_IS_CI: yes
+  HF_HUB_ENABLE_HF_TRANSFER: 1
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  PYTEST_TIMEOUT: 600
+  RUN_SLOW: yes
+  PIPELINE_USAGE_CUTOFF: 50000
+
+jobs:
+  setup_torch_cuda_pipeline_matrix:
+    name: Setup Torch Pipelines CUDA Slow Tests Matrix
+    runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
+    container:
+      image: diffusers/diffusers-pytorch-cpu
+    outputs:
+      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: Install dependencies
+        run: |
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
+          python -m uv pip install -e [quality,test]
+      - name: Environment
+        run: |
+          python utils/print_env.py
+      - name: Fetch Pipeline Matrix
+        id: fetch_pipeline_matrix
+        run: |
+          matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
+          echo $matrix
+          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
+      - name: Pipeline Tests Artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: test-pipelines.json
+          path: reports
+
+  torch_pipelines_cuda_tests:
+    name: Torch Pipelines CUDA Slow Tests
+    needs: setup_torch_cuda_pipeline_matrix
+    strategy:
+      max-parallel: 4
+      fail-fast: false
+      matrix:
+        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
+    runs-on: [single-gpu, nvidia-gpu, t4, ci]
+    container:
+      image: diffusers/diffusers-pytorch-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install libsndfile1-dev libgl1 -y
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
+          python -m uv pip install -e [quality,test]
+          python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
+          python -m uv pip install hf_transfer
+      - name: Environment
+        run: |
+          python utils/print_env.py
+      - name: Slow PyTorch CUDA checkpoint tests on Ubuntu
+        env:
+          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+          CUBLAS_WORKSPACE_CONFIG: :16:8
+        run: |
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+            -s -v -k "not Flax and not Onnx" \
+            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
+            tests/pipelines/${{ matrix.module }}
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: |
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: pipeline_${{ matrix.module }}_test_reports
+          path: reports
@@ -1,72 +0,0 @@
-import torch 
-from fa3_processor import FA3AttnProcessor
-from diffusers import DiffusionPipeline
-import argparse
-import torch.utils.benchmark as benchmark
-import gc
-import json
-
-def flush():
-    gc.collect()
-    torch.cuda.empty_cache()
-    torch.cuda.reset_max_memory_allocated()
-    torch.cuda.reset_peak_memory_stats()
-
-def bytes_to_giga_bytes(bytes):
-    return f"{(bytes / 1024 / 1024 / 1024):.3f}"
-
-def benchmark_fn(f, *args, **kwargs):
-    t0 = benchmark.Timer(
-        stmt="f(*args, **kwargs)",
-        globals={"args": args, "kwargs": kwargs, "f": f},
-        num_threads=torch.get_num_threads(),
-    )
-    return f"{(t0.blocked_autorange().mean):.3f}"
-
-def load_pipeline(args):
-    pipeline = DiffusionPipeline.from_pretrained(
-        "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", torch_dtype=torch.float16
-    ).to("cuda")
-    if args.fa3:
-        pipeline.transformer.set_attn_processor(FA3AttnProcessor())
-        pipeline.vae.set_attn_processor(FA3AttnProcessor())
-
-    pipeline.set_progress_bar_config(disable=True)
-    return pipeline 
-
-def run_pipeline(pipeline, args):
-    _ = pipeline(
-        prompt="a cat with tiger-like looks", 
-        num_images_per_prompt=args.batch_size, 
-        guidance_scale=7.5
-    )
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--fa3", default=0, type=int)
-    parser.add_argument("--batch_size", default=1, type=int)
-    args = parser.parse_args()
-
-    flush()
-
-    pipeline = load_pipeline(args)
-
-    for _ in range(3):
-        run_pipeline(pipeline, args)
-
-    time = benchmark_fn(run_pipeline, pipeline, args)
-    memory = bytes_to_giga_bytes(torch.cuda.max_memory_allocated()) 
-    data_dict = dict(time=time, memory=memory)
-    print(f"FA3: {bool(args.fa3)} Time: {time} seconds Memory: {memory} GB")
-
-    filename_prefix = f"fa3@{args.fa3}-bs@{args.batch_size}"
-    with open(f"{filename_prefix}.json", "w") as f:
-        json.dump(data_dict, f)
-
-    image = pipeline(
-        prompt="a cat with tiger-like looks", 
-        num_images_per_prompt=args.batch_size, 
-        num_inference_steps=25, 
-        guidance_scale=7.5
-    ).images[0]
-    image.save(f"{filename_prefix}.png")
@@ -1,95 +0,0 @@
-import torch
-from flash_attn_interface import flash_attn_func
-
-class FA3AttnProcessor:
-    r"""
-    Processor for using Flash Attention 3 (FA3) via `flash-attn`.
-
-    To install `flash-attn` that supports FA3, follow: 
-    https://github.com/Dao-AILab/flash-attention?tab=readme-ov-file#flashattention-3-beta-release
-
-    Reference: https://tridao.me/blog/2024/flash3/
-    """
-    def __call__(
-        self,
-        attn,
-        hidden_states,
-        encoder_hidden_states=None,
-        attention_mask=None,
-        temb=None,
-        *args,
-        **kwargs,
-    ) -> torch.Tensor:
-        residual = hidden_states
-
-        if attn.spatial_norm is not None:
-            hidden_states = attn.spatial_norm(hidden_states, temb)
-
-        input_ndim = hidden_states.ndim
-
-        if input_ndim == 4:
-            batch_size, channel, height, width = hidden_states.shape
-            hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
-
-        batch_size, key_tokens, _ = (
-            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
-        )
-
-        attention_mask = attn.prepare_attention_mask(attention_mask, key_tokens, batch_size)
-        if attention_mask is not None:
-            # expand our mask's singleton query_tokens dimension:
-            #   [batch*heads,            1, key_tokens] ->
-            #   [batch*heads, query_tokens, key_tokens]
-            # so that it can be added as a bias onto the attention scores that xformers computes:
-            #   [batch*heads, query_tokens, key_tokens]
-            # we do this explicitly because xformers doesn't broadcast the singleton dimension for us.
-            _, query_tokens, _ = hidden_states.shape
-            attention_mask = attention_mask.expand(-1, query_tokens, -1)
-
-        if attn.group_norm is not None:
-            hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
-
-        query = attn.to_q(hidden_states)
-
-        if encoder_hidden_states is None:
-            encoder_hidden_states = hidden_states
-        elif attn.norm_cross:
-            encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
-
-        key = attn.to_k(encoder_hidden_states)
-        value = attn.to_v(encoder_hidden_states)
-
-        inner_dim = key.shape[-1]
-        head_dim = inner_dim // attn.heads
-
-        query = query.view(batch_size, -1, attn.heads, head_dim).contiguous()
-        key = key.view(batch_size, -1, attn.heads, head_dim).contiguous()
-        value = value.view(batch_size, -1, attn.heads, head_dim).contiguous()
-
-        # nasty hack to make the head number and head dim compatible with FA3.
-        # if attn.heads ==1 and head_dim == 512:
-        #     factor = 8
-        #     new_head_dim = head_dim // factor
-        #     query = query.view(batch_size, -1, factor, new_head_dim)
-        #     key = key.view(batch_size, -1, factor, new_head_dim)
-        #     value = value.view(batch_size, -1, factor, new_head_dim)
-        hidden_states, _ = flash_attn_func(
-            query, key, value, softmax_scale=attn.scale, causal=False
-        )
-        hidden_states = hidden_states.reshape(batch_size, -1, attn.heads * head_dim)
-        hidden_states = hidden_states.to(query.dtype)
-
-        # linear proj
-        hidden_states = attn.to_out[0](hidden_states)
-        # dropout
-        hidden_states = attn.to_out[1](hidden_states)
-
-        if input_ndim == 4:
-            hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
-
-        if attn.residual_connection:
-            hidden_states = hidden_states + residual
-
-        hidden_states = hidden_states / attn.rescale_output_factor
-
-        return hidden_states
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional

 import torch
 from torch import nn
@@ -19,7 +19,6 @@ from torch import nn
 from ...configuration_utils import ConfigMixin, register_to_config
 from ...utils import is_torch_version, logging
 from ..attention import BasicTransformerBlock
-from ..attention_processor import AttentionProcessor
 from ..embeddings import PatchEmbed, PixArtAlphaTextProjection
 from ..modeling_outputs import Transformer2DModelOutput
 from ..modeling_utils import ModelMixin
@@ -187,64 +186,6 @@ class PixArtTransformer2DModel(ModelMixin, ConfigMixin):
        if hasattr(module, "gradient_checkpointing"):
            module.gradient_checkpointing = value

-    @property
-    def attn_processors(self) -> Dict[str, AttentionProcessor]:
-        r"""
-        Returns:
-            `dict` of attention processors: A dictionary containing all attention processors used in the model with
-            indexed by its weight name.
-        """
-        # set recursively
-        processors = {}
-
-        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.get_processor()
-
-            for sub_name, child in module.named_children():
-                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
-
-            return processors
-
-        for name, module in self.named_children():
-            fn_recursive_add_processors(name, module, processors)
-
-        return processors
-
-    def set_attn_processor(self, processor: Union[AttentionProcessor, Dict[str, AttentionProcessor]]):
-        r"""
-        Sets the attention processor to use to compute attention.
-
-        Parameters:
-            processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
-                The instantiated processor class or a dictionary of processor classes that will be set as the processor
-                for **all** `Attention` layers.
-
-                If `processor` is a dict, the key needs to define the path to the corresponding cross attention
-                processor. This is strongly recommended when setting trainable attention processors.
-
-        """
-        count = len(self.attn_processors.keys())
-
-        if isinstance(processor, dict) and len(processor) != count:
-            raise ValueError(
-                f"A dict of processors was passed, but the number of processors {len(processor)} does not match the"
-                f" number of attention layers: {count}. Please make sure to pass {count} processor classes."
-            )
-
-        def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
-            if hasattr(module, "set_processor"):
-                if not isinstance(processor, dict):
-                    module.set_processor(processor)
-                else:
-                    module.set_processor(processor.pop(f"{name}.processor"))
-
-            for sub_name, child in module.named_children():
-                fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
-
-        for name, module in self.named_children():
-            fn_recursive_attn_processor(name, module, processor)
-
    def forward(
        self,
        hidden_states: torch.Tensor,
@@ -76,7 +76,6 @@ def fetch_pipeline_modules_to_test():
    test_modules = []
    for pipeline_name in pipeline_objects:
        module = getattr(diffusers, pipeline_name)
-
        test_module = module.__module__.split(".")[-2].strip()
        test_modules.append(test_module)
Author	SHA1	Message	Date
Dhruv Nair	390d0879fe	update	2024-07-15 12:57:28 +00:00
Dhruv Nair	1fb8082af9	Merge branch 'main' into shm-size	2024-07-15 12:51:14 +00:00
Dhruv Nair	54e6f2d99e	Merge branch 'main' into shm-size	2024-07-04 09:12:16 +00:00
Dhruv Nair	22326b4971	update	2024-06-04 08:33:22 +00:00
Dhruv Nair	7cc0ba0070	update	2024-06-03 04:52:47 +00:00
Dhruv Nair	e4bd3f5a05	update	2024-06-03 04:51:32 +00:00
Dhruv Nair	25d0277911	change runner type	2024-05-07 07:21:49 +00:00
Dhruv Nair	14d7fe3f9e	update	2024-05-06 17:21:06 +00:00
Dhruv Nair	2f4e29d179	update	2024-05-06 16:28:50 +00:00
Dhruv Nair	b28311d9e5	update	2024-05-06 16:12:36 +00:00
Dhruv Nair	8a1020c91e	update	2024-05-06 16:07:35 +00:00
Dhruv Nair	2483c57c44	update	2024-05-06 15:21:02 +00:00
Dhruv Nair	f18942b3e6	update	2024-05-06 14:24:29 +00:00
Dhruv Nair	7021a35eeb	update	2024-05-06 13:06:44 +00:00
Dhruv Nair	e0455537c2	update	2024-05-06 09:00:56 +00:00
Dhruv Nair	c61360099b	update	2024-05-06 08:36:40 +00:00
Dhruv Nair	b585832d4e	update	2024-05-06 08:19:11 +00:00
Dhruv Nair	963c73c229	update	2024-05-06 06:10:01 +00:00
Dhruv Nair	09c595f212	update	2024-05-06 06:06:27 +00:00
Dhruv Nair	b1c5030418	update	2024-05-06 06:01:03 +00:00
Dhruv Nair	17e801ccb9	Merge branch 'main' into shm-size	2024-05-06 05:59:20 +00:00
Dhruv Nair	468008f6b7	update	2024-04-30 12:45:45 +00:00
Dhruv Nair	10e7d39f58	update	2024-04-30 12:40:00 +00:00
Dhruv Nair	49c01d4a4b	update	2024-04-30 12:37:45 +00:00
Dhruv Nair	f2e6c24df2	update	2024-04-30 12:35:56 +00:00
Dhruv Nair	ce659bc586	Merge branch 'main' into shm-size	2024-04-01 13:03:06 +00:00
Dhruv Nair	615c12ab68	Merge branch 'main' into shm-size	2024-03-28 05:15:41 +00:00
Dhruv Nair	e34d9f1949	update	2024-03-22 13:37:48 +00:00
Dhruv Nair	4106e3f182	update	2024-03-22 12:45:44 +00:00
Dhruv Nair	1651c9a1a1	update	2024-03-22 12:08:44 +00:00
Dhruv Nair	4bfdb34b36	update	2024-03-22 11:39:09 +00:00
Dhruv Nair	3d4f987cc9	update	2024-03-22 11:34:21 +00:00
Dhruv Nair	1feac0469b	update	2024-03-22 11:25:20 +00:00
Dhruv Nair	75e5cd046b	update	2024-03-22 09:32:23 +00:00
Dhruv Nair	865b6638f7	update	2024-03-22 09:26:12 +00:00
Dhruv Nair	428c952289	update	2024-03-22 08:11:16 +00:00
Dhruv Nair	d16c921346	update	2024-03-22 08:10:50 +00:00
Dhruv Nair	c46380165a	update	2024-03-22 07:42:00 +00:00
Dhruv Nair	a6a89aa199	update	2024-03-22 07:26:42 +00:00
Dhruv Nair	9e9a49ca24	update	2024-03-21 08:39:27 +00:00
Dhruv Nair	b9c90f7e22	update	2024-03-21 08:24:15 +00:00
Dhruv Nair	19fc3281c5	update	2024-03-21 08:18:44 +00:00
Dhruv Nair	fe8f0c9a76	update	2024-03-21 07:08:14 +00:00
Dhruv Nair	dc0bef11bd	Merge branch 'shm-size' of https://github.com/huggingface/diffusers into shm-size	2024-03-20 11:09:30 +00:00
Dhruv Nair	27ef43f043	update	2024-03-20 11:09:21 +00:00
Dhruv Nair	5d5c61bd09	Merge branch 'main' into shm-size	2024-03-20 16:36:40 +05:30
Dhruv Nair	7c38bff847	update	2024-03-20 10:36:10 +00:00