update

Fix batch > 1 in HunyuanVideo (#10548 )
Test sequential cpu offload for torchao quantization (#10506 )
2025-01-14 12:07:45 +05:30 · 2025-01-14 10:25:06 +05:30 · 2025-01-14 09:54:06 +05:30
3 changed files with 15 additions and 2 deletions
@@ -83,7 +83,7 @@ jobs:
          python utils/print_env.py
      - name: PyTorch CUDA checkpoint tests on Ubuntu
        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
@@ -727,7 +727,8 @@ class HunyuanVideoTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin,

        for i in range(batch_size):
            attention_mask[i, : effective_sequence_length[i]] = True
-        attention_mask = attention_mask.unsqueeze(1)  # [B, 1, N], for broadcasting across attention heads
+        # [B, 1, 1, N], for broadcasting across attention heads
+        attention_mask = attention_mask.unsqueeze(1).unsqueeze(1)

        # 4. Transformer blocks
        if torch.is_grad_enabled() and self.gradient_checkpointing:
@@ -476,6 +476,18 @@ class TorchAoTest(unittest.TestCase):
        with self.assertRaises(ValueError):
            self.get_dummy_components(TorchAoConfig("int42"))

+    def test_sequential_cpu_offload(self):
+        r"""
+        A test that checks if inference runs as expected when sequential cpu offloading is enabled.
+        """
+        quantization_config = TorchAoConfig("int8wo")
+        components = self.get_dummy_components(quantization_config)
+        pipe = FluxPipeline(**components)
+        pipe.enable_sequential_cpu_offload()
+
+        inputs = self.get_dummy_inputs(torch_device)
+        _ = pipe(**inputs)
+

 # Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
@require_torch
Author	SHA1	Message	Date
DN6	e7697e7a0a	update	2025-01-14 12:07:45 +05:30
hlky	4a4afd5ece	Fix batch > 1 in HunyuanVideo (#10548 )	2025-01-14 10:25:06 +05:30
Aryan	aa79d7da46	Test sequential cpu offload for torchao quantization (#10506 ) test sequential cpu offload	2025-01-14 09:54:06 +05:30