Compare commits

...

3 Commits

Author SHA1 Message Date
DN6 e7697e7a0a update 2025-01-14 12:07:45 +05:30
hlky 4a4afd5ece Fix batch > 1 in HunyuanVideo (#10548) 2025-01-14 10:25:06 +05:30
Aryan aa79d7da46 Test sequential cpu offload for torchao quantization (#10506)
test sequential cpu offload
2025-01-14 09:54:06 +05:30
3 changed files with 15 additions and 2 deletions
+1 -1
View File
@@ -83,7 +83,7 @@ jobs:
python utils/print_env.py
- name: PyTorch CUDA checkpoint tests on Ubuntu
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
CUBLAS_WORKSPACE_CONFIG: :16:8
run: |
@@ -727,7 +727,8 @@ class HunyuanVideoTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin,
for i in range(batch_size):
attention_mask[i, : effective_sequence_length[i]] = True
attention_mask = attention_mask.unsqueeze(1) # [B, 1, N], for broadcasting across attention heads
# [B, 1, 1, N], for broadcasting across attention heads
attention_mask = attention_mask.unsqueeze(1).unsqueeze(1)
# 4. Transformer blocks
if torch.is_grad_enabled() and self.gradient_checkpointing:
@@ -476,6 +476,18 @@ class TorchAoTest(unittest.TestCase):
with self.assertRaises(ValueError):
self.get_dummy_components(TorchAoConfig("int42"))
def test_sequential_cpu_offload(self):
r"""
A test that checks if inference runs as expected when sequential cpu offloading is enabled.
"""
quantization_config = TorchAoConfig("int8wo")
components = self.get_dummy_components(quantization_config)
pipe = FluxPipeline(**components)
pipe.enable_sequential_cpu_offload()
inputs = self.get_dummy_inputs(torch_device)
_ = pipe(**inputs)
# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners
@require_torch