diff --git a/docs/source/en/api/pipelines/animatediff.md b/docs/source/en/api/pipelines/animatediff.md index ed5ced7dbb..b4d347dc6a 100644 --- a/docs/source/en/api/pipelines/animatediff.md +++ b/docs/source/en/api/pipelines/animatediff.md @@ -966,7 +966,7 @@ pipe.to("cuda") prompt = { 0: "A caterpillar on a leaf, high quality, photorealistic", 40: "A caterpillar transforming into a cocoon, on a leaf, near flowers, photorealistic", - 80: "A cocoon on a leaf, flowers in the backgrond, photorealistic", + 80: "A cocoon on a leaf, flowers in the background, photorealistic", 120: "A cocoon maturing and a butterfly being born, flowers and leaves visible in the background, photorealistic", 160: "A beautiful butterfly, vibrant colors, sitting on a leaf, flowers in the background, photorealistic", 200: "A beautiful butterfly, flying away in a forest, photorealistic", diff --git a/docs/source/en/api/pipelines/ledits_pp.md b/docs/source/en/api/pipelines/ledits_pp.md index 0dc4b536ab..7c08971aa8 100644 --- a/docs/source/en/api/pipelines/ledits_pp.md +++ b/docs/source/en/api/pipelines/ledits_pp.md @@ -29,7 +29,7 @@ You can find additional information about LEDITS++ on the [project page](https:/ -Due to some backward compatability issues with the current diffusers implementation of [`~schedulers.DPMSolverMultistepScheduler`] this implementation of LEdits++ can no longer guarantee perfect inversion. +Due to some backward compatibility issues with the current diffusers implementation of [`~schedulers.DPMSolverMultistepScheduler`] this implementation of LEdits++ can no longer guarantee perfect inversion. This issue is unlikely to have any noticeable effects on applied use-cases. However, we provide an alternative implementation that guarantees perfect inversion in a dedicated [GitHub repo](https://github.com/ml-research/ledits_pp). diff --git a/docs/source/en/api/pipelines/wan.md b/docs/source/en/api/pipelines/wan.md index dbf3b973d7..09503125f5 100644 --- a/docs/source/en/api/pipelines/wan.md +++ b/docs/source/en/api/pipelines/wan.md @@ -285,7 +285,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( image_encoder=image_encoder, torch_dtype=torch.bfloat16 ) -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU pipe.to("cuda") image = load_image( @@ -368,7 +368,7 @@ pipe = WanImageToVideoPipeline.from_pretrained( image_encoder=image_encoder, torch_dtype=torch.bfloat16 ) -# Since we've offloaded the larger models alrady, we can move the rest of the model components to GPU +# Since we've offloaded the larger models already, we can move the rest of the model components to GPU pipe.to("cuda") image = load_image( diff --git a/docs/source/en/using-diffusers/inference_with_lcm.md b/docs/source/en/using-diffusers/inference_with_lcm.md index 02b0a9bda3..dba4aeb4f6 100644 --- a/docs/source/en/using-diffusers/inference_with_lcm.md +++ b/docs/source/en/using-diffusers/inference_with_lcm.md @@ -485,7 +485,7 @@ image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image).resize((1024, 1216)) -adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda") +adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, variant="fp16").to("cuda") unet = UNet2DConditionModel.from_pretrained( "latent-consistency/lcm-sdxl", @@ -551,7 +551,7 @@ image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image).resize((1024, 1024)) -adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, varient="fp16").to("cuda") +adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", torch_dtype=torch.float16, variant="fp16").to("cuda") pipe = StableDiffusionXLAdapterPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", diff --git a/docs/source/en/using-diffusers/pag.md b/docs/source/en/using-diffusers/pag.md index 26961d959c..1af690f86a 100644 --- a/docs/source/en/using-diffusers/pag.md +++ b/docs/source/en/using-diffusers/pag.md @@ -154,11 +154,11 @@ pipeline = AutoPipelineForInpainting.from_pretrained( pipeline.enable_model_cpu_offload() ``` -You can enable PAG on an exisiting inpainting pipeline like this +You can enable PAG on an existing inpainting pipeline like this ```py -pipeline_inpaint = AutoPipelineForInpaiting.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) -pipeline = AutoPipelineForInpaiting.from_pipe(pipeline_inpaint, enable_pag=True) +pipeline_inpaint = AutoPipelineForInpainting.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) +pipeline = AutoPipelineForInpainting.from_pipe(pipeline_inpaint, enable_pag=True) ``` This still works when your pipeline has a different task: diff --git a/examples/advanced_diffusion_training/README.md b/examples/advanced_diffusion_training/README.md index 504ae1471f..f30f8c83a1 100644 --- a/examples/advanced_diffusion_training/README.md +++ b/examples/advanced_diffusion_training/README.md @@ -125,7 +125,7 @@ Now we'll simply specify the name of the dataset and caption column (in this cas ``` You can also load a dataset straight from by specifying it's name in `dataset_name`. -Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loadin your own caption dataset. +Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loading your own caption dataset. - **optimizer**: for this example, we'll use [prodigy](https://huggingface.co/blog/sdxl_lora_advanced_script#adaptive-optimizers) - an adaptive optimizer - **pivotal tuning** @@ -404,7 +404,7 @@ The advanced script now supports custom choice of U-net blocks to train during D > In light of this, we're introducing a new feature to the advanced script to allow for configurable U-net learned blocks. **Usage** -Configure LoRA learned U-net blocks adding a `lora_unet_blocks` flag, with a comma seperated string specifying the targeted blocks. +Configure LoRA learned U-net blocks adding a `lora_unet_blocks` flag, with a comma separated string specifying the targeted blocks. e.g: ```bash --lora_unet_blocks="unet.up_blocks.0.attentions.0,unet.up_blocks.0.attentions.1" diff --git a/examples/advanced_diffusion_training/README_flux.md b/examples/advanced_diffusion_training/README_flux.md index f2a571d5ea..ded6f11314 100644 --- a/examples/advanced_diffusion_training/README_flux.md +++ b/examples/advanced_diffusion_training/README_flux.md @@ -141,7 +141,7 @@ Now we'll simply specify the name of the dataset and caption column (in this cas ``` You can also load a dataset straight from by specifying it's name in `dataset_name`. -Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loadin your own caption dataset. +Look [here](https://huggingface.co/blog/sdxl_lora_advanced_script#custom-captioning) for more info on creating/loading your own caption dataset. - **optimizer**: for this example, we'll use [prodigy](https://huggingface.co/blog/sdxl_lora_advanced_script#adaptive-optimizers) - an adaptive optimizer - **pivotal tuning** diff --git a/examples/amused/README.md b/examples/amused/README.md index 1230bd8667..9d5ae17ef5 100644 --- a/examples/amused/README.md +++ b/examples/amused/README.md @@ -1,6 +1,6 @@ ## Amused training -Amused can be finetuned on simple datasets relatively cheaply and quickly. Using 8bit optimizers, lora, and gradient accumulation, amused can be finetuned with as little as 5.5 GB. Here are a set of examples for finetuning amused on some relatively simple datasets. These training recipies are aggressively oriented towards minimal resources and fast verification -- i.e. the batch sizes are quite low and the learning rates are quite high. For optimal quality, you will probably want to increase the batch sizes and decrease learning rates. +Amused can be finetuned on simple datasets relatively cheaply and quickly. Using 8bit optimizers, lora, and gradient accumulation, amused can be finetuned with as little as 5.5 GB. Here are a set of examples for finetuning amused on some relatively simple datasets. These training recipes are aggressively oriented towards minimal resources and fast verification -- i.e. the batch sizes are quite low and the learning rates are quite high. For optimal quality, you will probably want to increase the batch sizes and decrease learning rates. All training examples use fp16 mixed precision and gradient checkpointing. We don't show 8 bit adam + lora as its about the same memory use as just using lora (bitsandbytes uses full precision optimizer states for weights below a minimum size). diff --git a/examples/cogvideo/README.md b/examples/cogvideo/README.md index 02887faeaa..6cb0a51e9f 100644 --- a/examples/cogvideo/README.md +++ b/examples/cogvideo/README.md @@ -201,7 +201,7 @@ Note that setting the `` is not necessary. From some limited experimen > - The original repository uses a `lora_alpha` of `1`. We found this not suitable in many runs, possibly due to difference in modeling backends and training settings. Our recommendation is to set to the `lora_alpha` to either `rank` or `rank // 2`. > - If you're training on data whose captions generate bad results with the original model, a `rank` of 64 and above is good and also the recommendation by the team behind CogVideoX. If the generations are already moderately good on your training captions, a `rank` of 16/32 should work. We found that setting the rank too low, say `4`, is not ideal and doesn't produce promising results. > - The authors of CogVideoX recommend 4000 training steps and 100 training videos overall to achieve the best result. While that might yield the best results, we found from our limited experimentation that 2000 steps and 25 videos could also be sufficient. -> - When using the Prodigy opitimizer for training, one can follow the recommendations from [this](https://huggingface.co/blog/sdxl_lora_advanced_script) blog. Prodigy tends to overfit quickly. From my very limited testing, I found a learning rate of `0.5` to be suitable in addition to `--prodigy_use_bias_correction`, `prodigy_safeguard_warmup` and `--prodigy_decouple`. +> - When using the Prodigy optimizer for training, one can follow the recommendations from [this](https://huggingface.co/blog/sdxl_lora_advanced_script) blog. Prodigy tends to overfit quickly. From my very limited testing, I found a learning rate of `0.5` to be suitable in addition to `--prodigy_use_bias_correction`, `prodigy_safeguard_warmup` and `--prodigy_decouple`. > - The recommended learning rate by the CogVideoX authors and from our experimentation with Adam/AdamW is between `1e-3` and `1e-4` for a dataset of 25+ videos. > > Note that our testing is not exhaustive due to limited time for exploration. Our recommendation would be to play around with the different knobs and dials to find the best settings for your data. diff --git a/examples/cogvideo/train_cogvideox_image_to_video_lora.py b/examples/cogvideo/train_cogvideox_image_to_video_lora.py index af69d45974..642aecabf7 100644 --- a/examples/cogvideo/train_cogvideox_image_to_video_lora.py +++ b/examples/cogvideo/train_cogvideox_image_to_video_lora.py @@ -879,7 +879,7 @@ def prepare_rotary_positional_embeddings( def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): - # Use DeepSpeed optimzer + # Use DeepSpeed optimizer if use_deepspeed: from accelerate.utils import DummyOptim diff --git a/examples/cogvideo/train_cogvideox_lora.py b/examples/cogvideo/train_cogvideox_lora.py index 71f9bcc61b..e737ce7624 100644 --- a/examples/cogvideo/train_cogvideox_lora.py +++ b/examples/cogvideo/train_cogvideox_lora.py @@ -901,7 +901,7 @@ def prepare_rotary_positional_embeddings( def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): - # Use DeepSpeed optimzer + # Use DeepSpeed optimizer if use_deepspeed: from accelerate.utils import DummyOptim diff --git a/examples/community/README.md b/examples/community/README.md index 3b1218dc27..3117070b20 100644 --- a/examples/community/README.md +++ b/examples/community/README.md @@ -4865,7 +4865,7 @@ python -m pip install intel_extension_for_pytorch ``` python -m pip install intel_extension_for_pytorch== -f https://developer.intel.com/ipex-whl-stable-cpu ``` -2. After pipeline initialization, `prepare_for_ipex()` should be called to enable IPEX accelaration. Supported inference datatypes are Float32 and BFloat16. +2. After pipeline initialization, `prepare_for_ipex()` should be called to enable IPEX acceleration. Supported inference datatypes are Float32 and BFloat16. ```python pipe = AnimateDiffPipelineIpex.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device) diff --git a/examples/community/dps_pipeline.py b/examples/community/dps_pipeline.py index 7b349f6693..5442bcc651 100755 --- a/examples/community/dps_pipeline.py +++ b/examples/community/dps_pipeline.py @@ -336,13 +336,13 @@ if __name__ == "__main__": expanded_kernel_width = np.ceil(kernel_width) + 2 # Determine a set of field_of_view for each each output position, these are the pixels in the input image - # that the pixel in the output image 'sees'. We get a matrix whos horizontal dim is the output pixels (big) and the + # that the pixel in the output image 'sees'. We get a matrix whose horizontal dim is the output pixels (big) and the # vertical dim is the pixels it 'sees' (kernel_size + 2) field_of_view = np.squeeze( np.int16(np.expand_dims(left_boundary, axis=1) + np.arange(expanded_kernel_width) - 1) ) - # Assign weight to each pixel in the field of view. A matrix whos horizontal dim is the output pixels and the + # Assign weight to each pixel in the field of view. A matrix whose horizontal dim is the output pixels and the # vertical dim is a list of weights matching to the pixel in the field of view (that are specified in # 'field_of_view') weights = fixed_kernel(1.0 * np.expand_dims(match_coordinates, axis=1) - field_of_view - 1) diff --git a/examples/community/hd_painter.py b/examples/community/hd_painter.py index 9711b40b11..20bb43a76f 100644 --- a/examples/community/hd_painter.py +++ b/examples/community/hd_painter.py @@ -201,16 +201,16 @@ class PAIntAAttnProcessor: # ================================================== # # We use a hack by running the code from the BasicTransformerBlock that is between Self and Cross attentions here # The other option would've been modifying the BasicTransformerBlock and adding this functionality here. - # I assumed that changing the BasicTransformerBlock would have been a bigger deal and decided to use this hack isntead. + # I assumed that changing the BasicTransformerBlock would have been a bigger deal and decided to use this hack instead. - # The SelfAttention block recieves the normalized latents from the BasicTransformerBlock, + # The SelfAttention block receives the normalized latents from the BasicTransformerBlock, # But the residual of the output is the non-normalized version. # Therefore we unnormalize the input hidden state here unnormalized_input_hidden_states = ( input_hidden_states + self.transformer_block.norm1.bias ) * self.transformer_block.norm1.weight - # TODO: return if neccessary + # TODO: return if necessary # if self.use_ada_layer_norm_zero: # attn_output = gate_msa.unsqueeze(1) * attn_output # elif self.use_ada_layer_norm_single: @@ -220,7 +220,7 @@ class PAIntAAttnProcessor: if transformer_hidden_states.ndim == 4: transformer_hidden_states = transformer_hidden_states.squeeze(1) - # TODO: return if neccessary + # TODO: return if necessary # 2.5 GLIGEN Control # if gligen_kwargs is not None: # transformer_hidden_states = self.fuser(transformer_hidden_states, gligen_kwargs["objs"]) @@ -266,7 +266,7 @@ class PAIntAAttnProcessor: ) = cross_attention_input_hidden_states.chunk(2) # Same split for the encoder_hidden_states i.e. the tokens - # Since the SelfAttention processors don't get the encoder states as input, we inject them into the processor in the begining. + # Since the SelfAttention processors don't get the encoder states as input, we inject them into the processor in the beginning. _encoder_hidden_states_unconditional, encoder_hidden_states_conditional = self.encoder_hidden_states.chunk( 2 ) @@ -896,7 +896,7 @@ class StableDiffusionHDPainterPipeline(StableDiffusionInpaintPipeline): class GaussianSmoothing(nn.Module): """ Apply gaussian smoothing on a - 1d, 2d or 3d tensor. Filtering is performed seperately for each channel + 1d, 2d or 3d tensor. Filtering is performed separately for each channel in the input using a depthwise convolution. Args: diff --git a/examples/community/img2img_inpainting.py b/examples/community/img2img_inpainting.py index 001e4cc5b2..c6de027897 100644 --- a/examples/community/img2img_inpainting.py +++ b/examples/community/img2img_inpainting.py @@ -161,7 +161,7 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline): `Image`, or tensor representing an image batch which will be inpainted, *i.e.* parts of the image will be masked out with `mask_image` and repainted according to `prompt`. inner_image (`torch.Tensor` or `PIL.Image.Image`): - `Image`, or tensor representing an image batch which will be overlayed onto `image`. Non-transparent + `Image`, or tensor representing an image batch which will be overlaid onto `image`. Non-transparent regions of `inner_image` must fit inside white pixels in `mask_image`. Expects four channels, with the last channel representing the alpha channel, which will be used to blend `inner_image` with `image`. If not provided, it will be forcibly cast to RGBA. diff --git a/examples/community/latent_consistency_img2img.py b/examples/community/latent_consistency_img2img.py index 6c532c7f76..01abf861b8 100644 --- a/examples/community/latent_consistency_img2img.py +++ b/examples/community/latent_consistency_img2img.py @@ -647,7 +647,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): return sample def set_timesteps( - self, stength, num_inference_steps: int, lcm_origin_steps: int, device: Union[str, torch.device] = None + self, strength, num_inference_steps: int, lcm_origin_steps: int, device: Union[str, torch.device] = None ): """ Sets the discrete timesteps used for the diffusion chain (to be run before inference). @@ -668,7 +668,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): # LCM Timesteps Setting: # Linear Spacing c = self.config.num_train_timesteps // lcm_origin_steps lcm_origin_timesteps = ( - np.asarray(list(range(1, int(lcm_origin_steps * stength) + 1))) * c - 1 + np.asarray(list(range(1, int(lcm_origin_steps * strength) + 1))) * c - 1 ) # LCM Training Steps Schedule skipping_step = len(lcm_origin_timesteps) // num_inference_steps timesteps = lcm_origin_timesteps[::-skipping_step][:num_inference_steps] # LCM Inference Steps Schedule diff --git a/examples/community/magic_mix.py b/examples/community/magic_mix.py index d3d118f84b..a29d0cfa09 100644 --- a/examples/community/magic_mix.py +++ b/examples/community/magic_mix.py @@ -129,7 +129,7 @@ class MagicMixPipeline(DiffusionPipeline): input = ( (mix_factor * latents) + (1 - mix_factor) * orig_latents - ) # interpolating between layout noise and conditionally generated noise to preserve layout sematics + ) # interpolating between layout noise and conditionally generated noise to preserve layout semantics input = torch.cat([input] * 2) else: # content generation phase diff --git a/examples/community/mixture_tiling.py b/examples/community/mixture_tiling.py index 867bce0d9e..3feed5c88d 100644 --- a/examples/community/mixture_tiling.py +++ b/examples/community/mixture_tiling.py @@ -196,9 +196,9 @@ class StableDiffusionTilingPipeline(DiffusionPipeline, StableDiffusionExtrasMixi guidance_scale_tiles: specific weights for classifier-free guidance in each tile. guidance_scale_tiles: specific weights for classifier-free guidance in each tile. If None, the value provided in guidance_scale will be used. seed_tiles: specific seeds for the initialization latents in each tile. These will override the latents generated for the whole canvas using the standard seed parameter. - seed_tiles_mode: either "full" "exclusive". If "full", all the latents affected by the tile be overriden. If "exclusive", only the latents that are affected exclusively by this tile (and no other tiles) will be overriden. - seed_reroll_regions: a list of tuples in the form (start row, end row, start column, end column, seed) defining regions in pixel space for which the latents will be overriden using the given seed. Takes priority over seed_tiles. - cpu_vae: the decoder from latent space to pixel space can require too mucho GPU RAM for large images. If you find out of memory errors at the end of the generation process, try setting this parameter to True to run the decoder in CPU. Slower, but should run without memory issues. + seed_tiles_mode: either "full" "exclusive". If "full", all the latents affected by the tile be overridden. If "exclusive", only the latents that are affected exclusively by this tile (and no other tiles) will be overridden. + seed_reroll_regions: a list of tuples in the form (start row, end row, start column, end column, seed) defining regions in pixel space for which the latents will be overridden using the given seed. Takes priority over seed_tiles. + cpu_vae: the decoder from latent space to pixel space can require too much GPU RAM for large images. If you find out of memory errors at the end of the generation process, try setting this parameter to True to run the decoder in CPU. Slower, but should run without memory issues. Examples: diff --git a/examples/community/pipeline_controlnet_xl_kolors.py b/examples/community/pipeline_controlnet_xl_kolors.py index b805c9a04a..5b0576fbcd 100644 --- a/examples/community/pipeline_controlnet_xl_kolors.py +++ b/examples/community/pipeline_controlnet_xl_kolors.py @@ -1258,7 +1258,7 @@ class KolorsControlNetPipeline( ) if guess_mode and self.do_classifier_free_guidance: - # Infered ControlNet only for the conditional batch. + # Inferred ControlNet only for the conditional batch. # To apply the output of ControlNet to both the unconditional and conditional batches, # add 0 to the unconditional batch to keep it unchanged. down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples] diff --git a/examples/community/pipeline_controlnet_xl_kolors_img2img.py b/examples/community/pipeline_controlnet_xl_kolors_img2img.py index 5cfb98d969..44c866e826 100644 --- a/examples/community/pipeline_controlnet_xl_kolors_img2img.py +++ b/examples/community/pipeline_controlnet_xl_kolors_img2img.py @@ -1462,7 +1462,7 @@ class KolorsControlNetImg2ImgPipeline( ) if guess_mode and self.do_classifier_free_guidance: - # Infered ControlNet only for the conditional batch. + # Inferred ControlNet only for the conditional batch. # To apply the output of ControlNet to both the unconditional and conditional batches, # add 0 to the unconditional batch to keep it unchanged. down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples] diff --git a/examples/community/pipeline_controlnet_xl_kolors_inpaint.py b/examples/community/pipeline_controlnet_xl_kolors_inpaint.py index 68d1153d0d..09d4b0241e 100644 --- a/examples/community/pipeline_controlnet_xl_kolors_inpaint.py +++ b/examples/community/pipeline_controlnet_xl_kolors_inpaint.py @@ -1782,7 +1782,7 @@ class KolorsControlNetInpaintPipeline( ) if guess_mode and self.do_classifier_free_guidance: - # Infered ControlNet only for the conditional batch. + # Inferred ControlNet only for the conditional batch. # To apply the output of ControlNet to both the unconditional and conditional batches, # add 0 to the unconditional batch to keep it unchanged. down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples] diff --git a/examples/community/pipeline_fabric.py b/examples/community/pipeline_fabric.py index 30847f875b..2eddbd06ce 100644 --- a/examples/community/pipeline_fabric.py +++ b/examples/community/pipeline_fabric.py @@ -559,7 +559,7 @@ class FabricPipeline(DiffusionPipeline): End point for providing feedback (between 0 and 1). min_weight (`float`, *optional*, defaults to `.05`): Minimum weight for feedback. - max_weight (`float`, *optional*, defults tp `1.0`): + max_weight (`float`, *optional*, defaults tp `1.0`): Maximum weight for feedback. neg_scale (`float`, *optional*, defaults to `.5`): Scale factor for negative feedback. diff --git a/examples/community/pipeline_faithdiff_stable_diffusion_xl.py b/examples/community/pipeline_faithdiff_stable_diffusion_xl.py index d1d3d80b4a..749f0322d0 100644 --- a/examples/community/pipeline_faithdiff_stable_diffusion_xl.py +++ b/examples/community/pipeline_faithdiff_stable_diffusion_xl.py @@ -118,7 +118,7 @@ EXAMPLE_DOC_STRING = """ >>> # Here we need use pipeline internal unet model >>> pipe.unet = pipe.unet_model.from_pretrained(model_id, subfolder="unet", variant="fp16", use_safetensors=True) >>> - >>> # Load aditional layers to the model + >>> # Load additional layers to the model >>> pipe.unet.load_additional_layers(weight_path="proc_data/faithdiff/FaithDiff.bin", dtype=dtype) >>> >>> # Enable vae tiling diff --git a/examples/community/pipeline_stable_diffusion_boxdiff.py b/examples/community/pipeline_stable_diffusion_boxdiff.py index bd58a65ce7..7c7f7e8a18 100644 --- a/examples/community/pipeline_stable_diffusion_boxdiff.py +++ b/examples/community/pipeline_stable_diffusion_boxdiff.py @@ -72,7 +72,7 @@ class GaussianSmoothing(nn.Module): """ Copied from official repo: https://github.com/showlab/BoxDiff/blob/master/utils/gaussian_smoothing.py Apply gaussian smoothing on a - 1d, 2d or 3d tensor. Filtering is performed seperately for each channel + 1d, 2d or 3d tensor. Filtering is performed separately for each channel in the input using a depthwise convolution. Arguments: channels (int, sequence): Number of channels of the input tensors. Output will diff --git a/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py b/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py index 8459553f4e..73f52736f4 100644 --- a/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py +++ b/examples/community/pipeline_stable_diffusion_xl_attentive_eraser.py @@ -1509,7 +1509,7 @@ class StableDiffusionXL_AE_Pipeline( add_time_ids = add_time_ids.repeat(batch_size, 1).to(DEVICE) - # interative sampling + # interactive sampling self.scheduler.set_timesteps(num_inference_steps) latents_list = [latents] pred_x0_list = [] @@ -1548,7 +1548,7 @@ class StableDiffusionXL_AE_Pipeline( x: torch.FloatTensor, ): """ - predict the sampe the next step in the denoise process. + predict the sample the next step in the denoise process. """ ref_noise = model_output[:1, :, :, :].expand(model_output.shape) alpha_prod_t = self.scheduler.alphas_cumprod[timestep] diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py index e55be92962..de5887c6de 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py @@ -132,7 +132,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py index 791e05ebaf..c5f8ec3dfa 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py @@ -150,7 +150,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/examples/community/regional_prompting_stable_diffusion.py b/examples/community/regional_prompting_stable_diffusion.py index 9f09b4bd2b..25923a6503 100644 --- a/examples/community/regional_prompting_stable_diffusion.py +++ b/examples/community/regional_prompting_stable_diffusion.py @@ -220,7 +220,7 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline): revers = True def pcallback(s_self, step: int, timestep: int, latents: torch.Tensor, selfs=None): - if "PRO" in mode: # in Prompt mode, make masks from sum of attension maps + if "PRO" in mode: # in Prompt mode, make masks from sum of attention maps self.step = step if len(self.attnmaps_sizes) > 3: @@ -552,9 +552,9 @@ def get_attn_maps(self, attn): def reset_attnmaps(self): # init parameters in every batch self.step = 0 - self.attnmaps = {} # maked from attention maps + self.attnmaps = {} # made from attention maps self.attnmaps_sizes = [] # height,width set of u-net blocks - self.attnmasks = {} # maked from attnmaps for regions + self.attnmasks = {} # made from attnmaps for regions self.maskready = False self.history = {} diff --git a/examples/community/sde_drag.py b/examples/community/sde_drag.py index 902eaa99f4..3ded8c247c 100644 --- a/examples/community/sde_drag.py +++ b/examples/community/sde_drag.py @@ -97,7 +97,7 @@ class SdeDragPipeline(DiffusionPipeline): steps (`int`, *optional*, defaults to 200): The number of sampling iterations. step_size (`int`, *optional*, defaults to 2): - The drag diatance of each drag step. + The drag distance of each drag step. image_scale (`float`, *optional*, defaults to 0.3): To avoid duplicating the content, use image_scale to perturbs the source. adapt_radius (`int`, *optional*, defaults to 5): diff --git a/examples/community/unclip_image_interpolation.py b/examples/community/unclip_image_interpolation.py index 210bd61ecd..413c103cef 100644 --- a/examples/community/unclip_image_interpolation.py +++ b/examples/community/unclip_image_interpolation.py @@ -284,7 +284,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline): ) else: raise AssertionError( - f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} repsectively" + f"Expected 'image' or 'image_embeddings' to be not None with types List[PIL.Image] or torch.Tensor respectively. Received {type(image)} and {type(image_embeddings)} respectively" ) original_image_embeddings = self._encode_image( diff --git a/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py b/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py index 3414640f55..b254799756 100644 --- a/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_lora_sd_wds.py @@ -1012,7 +1012,7 @@ def main(args): unet = get_peft_model(unet, lora_config) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py b/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py index cb8c425bcb..a332b30b28 100644 --- a/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py +++ b/examples/consistency_distillation/train_lcm_distill_lora_sdxl.py @@ -829,7 +829,7 @@ def main(args): ) # 8. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py b/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py index d636c145ff..52d4806100 100644 --- a/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_lora_sdxl_wds.py @@ -1026,7 +1026,7 @@ def main(args): unet = get_peft_model(unet, lora_config) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_sd_wds.py b/examples/consistency_distillation/train_lcm_distill_sd_wds.py index 50a3d4ebd1..3be506352f 100644 --- a/examples/consistency_distillation/train_lcm_distill_sd_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_sd_wds.py @@ -962,7 +962,7 @@ def main(args): ) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py b/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py index a719db9a89..5a28201bf7 100644 --- a/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py +++ b/examples/consistency_distillation/train_lcm_distill_sdxl_wds.py @@ -1021,7 +1021,7 @@ def main(args): ) # 9. Handle mixed precision and device placement - # For mixed precision training we cast all non-trainable weigths to half-precision + # For mixed precision training we cast all non-trainable weights to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/controlnet/README_flux.md b/examples/controlnet/README_flux.md index aa5fa25140..fcac6df110 100644 --- a/examples/controlnet/README_flux.md +++ b/examples/controlnet/README_flux.md @@ -411,7 +411,7 @@ export CAPTION_COLUMN='caption_column' export CACHE_DIR="/data/train_csr/.cache/huggingface/" export OUTPUT_DIR='/data/train_csr/FLUX/MODEL_OUT/'$MODEL_TYPE -# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using acclerate would cause problems.) +# The first step is to use Python to precompute all caches.Replace the first line below with this line. (I am not sure why using accelerate would cause problems.) CUDA_VISIBLE_DEVICES=0 python3 train_controlnet_flux.py \ diff --git a/examples/dreambooth/README_flux.md b/examples/dreambooth/README_flux.md index c0802246e1..3a6f7905e6 100644 --- a/examples/dreambooth/README_flux.md +++ b/examples/dreambooth/README_flux.md @@ -173,13 +173,13 @@ accelerate launch train_dreambooth_lora_flux.py \ ### Target Modules When LoRA was first adapted from language models to diffusion models, it was applied to the cross-attention layers in the Unet that relate the image representations with the prompts that describe them. More recently, SOTA text-to-image diffusion models replaced the Unet with a diffusion Transformer(DiT). With this change, we may also want to explore -applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma seperated string +applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma separated string the exact modules for LoRA training. Here are some examples of target modules you can provide: - for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"` - to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2"` - to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2,norm1_context.linear, norm1.linear,norm.linear,proj_mlp,proj_out"` > [!NOTE] -> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma seperated string: +> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma separated string: > **single DiT blocks**: to target the ith single transformer block, add the prefix `single_transformer_blocks.i`, e.g. - `single_transformer_blocks.i.attn.to_k` > **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` > [!NOTE] diff --git a/examples/dreambooth/README_hidream.md b/examples/dreambooth/README_hidream.md index a0e8c1feca..63b19a7f70 100644 --- a/examples/dreambooth/README_hidream.md +++ b/examples/dreambooth/README_hidream.md @@ -107,7 +107,7 @@ To better track our training experiments, we're using the following flags in the Additionally, we welcome you to explore the following CLI arguments: -* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. +* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. * `--rank`: The rank of the LoRA layers. The higher the rank, the more parameters are trained. The default is 16. We provide several options for optimizing memory optimization: diff --git a/examples/dreambooth/README_lumina2.md b/examples/dreambooth/README_lumina2.md index e466ec5a68..fe2907092c 100644 --- a/examples/dreambooth/README_lumina2.md +++ b/examples/dreambooth/README_lumina2.md @@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the Additionally, we welcome you to explore the following CLI arguments: -* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. +* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. * `--system_prompt`: A custom system prompt to provide additional personality to the model. * `--max_sequence_length`: Maximum sequence length to use for text embeddings. diff --git a/examples/dreambooth/README_sana.md b/examples/dreambooth/README_sana.md index d82529c64d..6136bfcc16 100644 --- a/examples/dreambooth/README_sana.md +++ b/examples/dreambooth/README_sana.md @@ -113,7 +113,7 @@ To better track our training experiments, we're using the following flags in the Additionally, we welcome you to explore the following CLI arguments: -* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. +* `--lora_layers`: The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only. * `--complex_human_instruction`: Instructions for complex human attention as shown in [here](https://github.com/NVlabs/Sana/blob/main/configs/sana_app_config/Sana_1600M_app.yaml#L55). * `--max_sequence_length`: Maximum sequence length to use for text embeddings. diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index 193c5affe6..5341c321c3 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -567,7 +567,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_hidream.py b/examples/dreambooth/train_dreambooth_lora_hidream.py index fbf62999d6..39de320914 100644 --- a/examples/dreambooth/train_dreambooth_lora_hidream.py +++ b/examples/dreambooth/train_dreambooth_lora_hidream.py @@ -596,7 +596,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_lumina2.py b/examples/dreambooth/train_dreambooth_lora_lumina2.py index e933a80330..1e4db90d87 100644 --- a/examples/dreambooth/train_dreambooth_lora_lumina2.py +++ b/examples/dreambooth/train_dreambooth_lora_lumina2.py @@ -514,7 +514,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_sana.py b/examples/dreambooth/train_dreambooth_lora_sana.py index 94effd7cba..bef6e04594 100644 --- a/examples/dreambooth/train_dreambooth_lora_sana.py +++ b/examples/dreambooth/train_dreambooth_lora_sana.py @@ -513,7 +513,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v" will result in lora training of attention layers only' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index c693038bb5..b1786260d1 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -576,7 +576,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string." + "The transformer block layers to apply LoRA training on. Please specify the layers in a comma separated string." "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md" ), ) @@ -585,7 +585,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner." + "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma separated manner." 'E.g. - "--lora_blocks 12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' ), ) diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index fd50196170..90979ee8ff 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -664,7 +664,7 @@ def parse_args(input_args=None): action="store_true", default=False, help=( - "Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " + "Whether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " "Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`" ), ) diff --git a/examples/flux-control/train_control_lora_flux.py b/examples/flux-control/train_control_lora_flux.py index db27f06f87..fe078f3e75 100644 --- a/examples/flux-control/train_control_lora_flux.py +++ b/examples/flux-control/train_control_lora_flux.py @@ -329,7 +329,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma separated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' ), ) parser.add_argument( diff --git a/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py b/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py index a60bd7d586..f96a4c4f98 100644 --- a/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py +++ b/examples/kandinsky2_2/text_to_image/train_text_to_image_lora_decoder.py @@ -400,7 +400,7 @@ def main(): image_encoder.requires_grad_(False) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/research_projects/consistency_training/train_cm_ct_unconditional.py b/examples/research_projects/consistency_training/train_cm_ct_unconditional.py index 2bea064cdb..c873356eb2 100644 --- a/examples/research_projects/consistency_training/train_cm_ct_unconditional.py +++ b/examples/research_projects/consistency_training/train_cm_ct_unconditional.py @@ -1147,7 +1147,7 @@ def main(args): tracker_config = dict(vars(args)) accelerator.init_trackers(args.tracker_project_name, config=tracker_config) - # Function for unwraping if torch.compile() was used in accelerate. + # Function for unwrapping if torch.compile() was used in accelerate. def unwrap_model(model): model = accelerator.unwrap_model(model) model = model._orig_mod if is_compiled_module(model) else model diff --git a/examples/research_projects/flux_lora_quantization/README.md b/examples/research_projects/flux_lora_quantization/README.md index 51005b6402..840d02fce7 100644 --- a/examples/research_projects/flux_lora_quantization/README.md +++ b/examples/research_projects/flux_lora_quantization/README.md @@ -69,7 +69,7 @@ accelerate launch --config_file=accelerate.yaml \ --seed="0" ``` -We can direcly pass a quantized checkpoint path, too: +We can directly pass a quantized checkpoint path, too: ```diff + --quantized_model_path="hf-internal-testing/flux.1-dev-nf4-pkg" diff --git a/examples/research_projects/intel_opts/inference_bf16.py b/examples/research_projects/intel_opts/inference_bf16.py index 96ec709f43..13f2731fb7 100644 --- a/examples/research_projects/intel_opts/inference_bf16.py +++ b/examples/research_projects/intel_opts/inference_bf16.py @@ -13,7 +13,7 @@ args = parser.parse_args() device = "cpu" -prompt = "a lovely in red dress and hat, in the snowly and brightly night, with many brighly buildings" +prompt = "a lovely in red dress and hat, in the snowly and brightly night, with many brightly buildings" model_id = "path-to-your-trained-model" pipe = StableDiffusionPipeline.from_pretrained(model_id) diff --git a/examples/research_projects/intel_opts/textual_inversion_dfq/README.md b/examples/research_projects/intel_opts/textual_inversion_dfq/README.md index 4a227cdb4d..184a64ec76 100644 --- a/examples/research_projects/intel_opts/textual_inversion_dfq/README.md +++ b/examples/research_projects/intel_opts/textual_inversion_dfq/README.md @@ -80,7 +80,7 @@ export INT8_MODEL_NAME="./int8_model" python text2images.py \ --pretrained_model_name_or_path=$INT8_MODEL_NAME \ - --caption "a lovely in red dress and hat, in the snowly and brightly night, with many brighly buildings." \ + --caption "a lovely in red dress and hat, in the snowly and brightly night, with many brightly buildings." \ --images_num 4 ``` diff --git a/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py b/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py index 4065a854c2..5a555c45a1 100644 --- a/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py +++ b/examples/research_projects/pixart/pipeline_pixart_alpha_controlnet.py @@ -664,7 +664,7 @@ class PixArtAlphaControlnetPipeline(DiffusionPipeline): # & caption = re.sub(r"&", "", caption) - # ip adresses: + # ip addresses: caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption) # article ids: diff --git a/examples/research_projects/pixart/train_pixart_controlnet_hf.py b/examples/research_projects/pixart/train_pixart_controlnet_hf.py index 67ec30da0e..98329c6cd4 100644 --- a/examples/research_projects/pixart/train_pixart_controlnet_hf.py +++ b/examples/research_projects/pixart/train_pixart_controlnet_hf.py @@ -612,7 +612,7 @@ def main(): # See Section 3.1. of the paper. max_length = 120 - # For mixed precision training we cast all non-trainable weigths (vae, text_encoder) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, text_encoder) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py b/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py index 9c98c9b5ff..35cb015a6c 100644 --- a/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py +++ b/examples/research_projects/pytorch_xla/inference/flux/flux_inference.py @@ -120,11 +120,11 @@ if __name__ == "__main__": parser.add_argument("--schnell", action="store_true", help="run flux schnell instead of dev") parser.add_argument("--width", type=int, default=1024, help="width of the image to generate") parser.add_argument("--height", type=int, default=1024, help="height of the image to generate") - parser.add_argument("--guidance", type=float, default=3.5, help="gauidance strentgh for dev") + parser.add_argument("--guidance", type=float, default=3.5, help="guidance strength for dev") parser.add_argument("--seed", type=int, default=None, help="seed for inference") parser.add_argument("--profile", action="store_true", help="enable profiling") parser.add_argument("--profile-duration", type=int, default=10000, help="duration for profiling in msec.") - parser.add_argument("--itters", type=int, default=15, help="tiems to run inference and get avg time in sec.") + parser.add_argument("--itters", type=int, default=15, help="items to run inference and get avg time in sec.") args = parser.parse_args() if args.schnell: ckpt_id = "black-forest-labs/FLUX.1-schnell" diff --git a/examples/research_projects/realfill/train_realfill.py b/examples/research_projects/realfill/train_realfill.py index c7cc25df02..419636d131 100644 --- a/examples/research_projects/realfill/train_realfill.py +++ b/examples/research_projects/realfill/train_realfill.py @@ -759,7 +759,7 @@ def main(args): unet, text_encoder, optimizer, train_dataloader ) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py index 01ef67a55d..402265bde1 100644 --- a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py @@ -661,7 +661,7 @@ def parse_args(input_args=None): action="store_true", default=False, help=( - "Wether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " + "Whether to train a DoRA as proposed in- DoRA: Weight-Decomposed Low-Rank Adaptation https://arxiv.org/abs/2402.09353. " "Note: to use DoRA you need to install peft from main, `pip install git+https://github.com/huggingface/peft.git`" ), ) diff --git a/examples/textual_inversion/textual_inversion.py b/examples/textual_inversion/textual_inversion.py index 019b796011..6dcc2ff7dc 100644 --- a/examples/textual_inversion/textual_inversion.py +++ b/examples/textual_inversion/textual_inversion.py @@ -789,7 +789,7 @@ def main(): text_encoder, optimizer, train_dataloader, lr_scheduler ) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/examples/textual_inversion/textual_inversion_sdxl.py b/examples/textual_inversion/textual_inversion_sdxl.py index d142cccc92..ecbc7a185b 100644 --- a/examples/textual_inversion/textual_inversion_sdxl.py +++ b/examples/textual_inversion/textual_inversion_sdxl.py @@ -814,7 +814,7 @@ def main(): text_encoder_1, text_encoder_2, optimizer, train_dataloader, lr_scheduler ) - # For mixed precision training we cast all non-trainable weigths (vae, non-lora text_encoder and non-lora unet) to half-precision + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision # as these weights are only used for inference, keeping weights in full precision is not required. weight_dtype = torch.float32 if accelerator.mixed_precision == "fp16": diff --git a/scripts/convert_flux_to_diffusers.py b/scripts/convert_flux_to_diffusers.py index fccac70dd8..ec31d842d4 100644 --- a/scripts/convert_flux_to_diffusers.py +++ b/scripts/convert_flux_to_diffusers.py @@ -220,7 +220,7 @@ def convert_flux_transformer_checkpoint_to_diffusers( f"double_blocks.{i}.txt_attn.proj.bias" ) - # single transfomer blocks + # single transformer blocks for i in range(num_single_layers): block_prefix = f"single_transformer_blocks.{i}." # norm.linear <- single_blocks.0.modulation.lin diff --git a/scripts/convert_sana_to_diffusers.py b/scripts/convert_sana_to_diffusers.py index 1c40072177..959a647e0a 100644 --- a/scripts/convert_sana_to_diffusers.py +++ b/scripts/convert_sana_to_diffusers.py @@ -394,7 +394,7 @@ if __name__ == "__main__": help="Scheduler type to use. Use 'scm' for Sana Sprint models.", ) parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output pipeline.") - parser.add_argument("--save_full_pipeline", action="store_true", help="save all the pipelien elemets in one.") + parser.add_argument("--save_full_pipeline", action="store_true", help="save all the pipeline elements in one.") parser.add_argument("--dtype", default="fp32", type=str, choices=["fp32", "fp16", "bf16"], help="Weight dtype.") args = parser.parse_args() diff --git a/scripts/convert_shap_e_to_diffusers.py b/scripts/convert_shap_e_to_diffusers.py index b903b4ee8a..ac6543667a 100644 --- a/scripts/convert_shap_e_to_diffusers.py +++ b/scripts/convert_shap_e_to_diffusers.py @@ -984,7 +984,7 @@ def renderer(*, args, checkpoint_map_location): return renderer_model -# prior model will expect clip_mean and clip_std, whic are missing from the state_dict +# prior model will expect clip_mean and clip_std, which are missing from the state_dict PRIOR_EXPECTED_MISSING_KEYS = ["clip_mean", "clip_std"] diff --git a/scripts/convert_wuerstchen.py b/scripts/convert_wuerstchen.py index 23d45d3dd6..826b9b2081 100644 --- a/scripts/convert_wuerstchen.py +++ b/scripts/convert_wuerstchen.py @@ -55,8 +55,8 @@ for key in orig_state_dict.keys(): state_dict[key.replace("attn.out_proj.bias", "to_out.0.bias")] = weights else: state_dict[key] = orig_state_dict[key] -deocder = WuerstchenDiffNeXt() -deocder.load_state_dict(state_dict) +decoder = WuerstchenDiffNeXt() +decoder.load_state_dict(state_dict) # Prior orig_state_dict = torch.load(os.path.join(model_path, "model_v3_stage_c.pt"), map_location=device)["ema_state_dict"] @@ -94,7 +94,7 @@ prior_pipeline = WuerstchenPriorPipeline( prior_pipeline.save_pretrained("warp-ai/wuerstchen-prior") decoder_pipeline = WuerstchenDecoderPipeline( - text_encoder=gen_text_encoder, tokenizer=gen_tokenizer, vqgan=vqmodel, decoder=deocder, scheduler=scheduler + text_encoder=gen_text_encoder, tokenizer=gen_tokenizer, vqgan=vqmodel, decoder=decoder, scheduler=scheduler ) decoder_pipeline.save_pretrained("warp-ai/wuerstchen") @@ -103,7 +103,7 @@ wuerstchen_pipeline = WuerstchenCombinedPipeline( # Decoder text_encoder=gen_text_encoder, tokenizer=gen_tokenizer, - decoder=deocder, + decoder=decoder, scheduler=scheduler, vqgan=vqmodel, # Prior diff --git a/src/diffusers/hooks/group_offloading.py b/src/diffusers/hooks/group_offloading.py index a2c2e2430c..7a8970aeed 100644 --- a/src/diffusers/hooks/group_offloading.py +++ b/src/diffusers/hooks/group_offloading.py @@ -243,7 +243,7 @@ class GroupOffloadingHook(ModelHook): class LazyPrefetchGroupOffloadingHook(ModelHook): r""" - A hook, used in conjuction with GroupOffloadingHook, that applies lazy prefetching to groups of torch.nn.Module. + A hook, used in conjunction with GroupOffloadingHook, that applies lazy prefetching to groups of torch.nn.Module. This hook is used to determine the order in which the layers are executed during the forward pass. Once the layer invocation order is known, assignments of the next_group attribute for prefetching can be made, which allows prefetching groups in the correct order. diff --git a/src/diffusers/hooks/layerwise_casting.py b/src/diffusers/hooks/layerwise_casting.py index 6f2cfdc348..c0105ab934 100644 --- a/src/diffusers/hooks/layerwise_casting.py +++ b/src/diffusers/hooks/layerwise_casting.py @@ -90,7 +90,7 @@ class PeftInputAutocastDisableHook(ModelHook): that the inputs are casted to the computation dtype correctly always. However, there are two goals we are hoping to achieve: 1. Making forward implementations independent of device/dtype casting operations as much as possible. - 2. Peforming inference without losing information from casting to different precisions. With the current + 2. Performing inference without losing information from casting to different precisions. With the current PEFT implementation (as linked in the reference above), and assuming running layerwise casting inference with storage_dtype=torch.float8_e4m3fn and compute_dtype=torch.bfloat16, inputs are cast to torch.float8_e4m3fn in the lora layer. We will then upcast back to torch.bfloat16 when we continue the diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index d0c9611735..a9e154af3c 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -819,7 +819,7 @@ def _convert_kohya_flux_lora_to_diffusers(state_dict): if zero_status_pe: logger.info( "The `position_embedding` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: @@ -835,7 +835,7 @@ def _convert_kohya_flux_lora_to_diffusers(state_dict): if zero_status_t5: logger.info( "The `t5xxl` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: logger.info( @@ -850,7 +850,7 @@ def _convert_kohya_flux_lora_to_diffusers(state_dict): if zero_status_diff_b: logger.info( "The `diff_b` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: logger.info( @@ -866,7 +866,7 @@ def _convert_kohya_flux_lora_to_diffusers(state_dict): if zero_status_diff: logger.info( "The `diff` LoRA params are all zeros which make them ineffective. " - "So, we will purge them out of the curret state dict to make loading possible." + "So, we will purge them out of the current state dict to make loading possible." ) else: logger.info( @@ -1237,7 +1237,7 @@ def _convert_bfl_flux_control_lora_to_diffusers(original_state_dict): f"double_blocks.{i}.txt_attn.norm.key_norm.scale" ) - # single transfomer blocks + # single transformer blocks for i in range(num_single_layers): block_prefix = f"single_transformer_blocks.{i}." diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index 50a99cee1d..1a6768e70d 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -2413,7 +2413,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin): ) -> bool: """ Control LoRA expands the shape of the input layer from (3072, 64) to (3072, 128). This method handles that and - generalizes things a bit so that any parameter that needs expansion receives appropriate treatement. + generalizes things a bit so that any parameter that needs expansion receives appropriate treatment. """ state_dict = {} if lora_state_dict is not None: diff --git a/src/diffusers/loaders/peft.py b/src/diffusers/loaders/peft.py index 50450ab7d8..bbef5b1628 100644 --- a/src/diffusers/loaders/peft.py +++ b/src/diffusers/loaders/peft.py @@ -330,7 +330,7 @@ class PeftAdapterMixin: new_sd[k] = v return new_sd - # To handle scenarios where we cannot successfully set state dict. If it's unsucessful, + # To handle scenarios where we cannot successfully set state dict. If it's unsuccessful, # we should also delete the `peft_config` associated to the `adapter_name`. try: if hotswap: @@ -344,7 +344,7 @@ class PeftAdapterMixin: config=lora_config, ) except Exception as e: - logger.error(f"Hotswapping {adapter_name} was unsucessful with the following error: \n{e}") + logger.error(f"Hotswapping {adapter_name} was unsuccessful with the following error: \n{e}") raise # the hotswap function raises if there are incompatible keys, so if we reach this point we can set # it to None @@ -379,7 +379,7 @@ class PeftAdapterMixin: module.delete_adapter(adapter_name) self.peft_config.pop(adapter_name) - logger.error(f"Loading {adapter_name} was unsucessful with the following error: \n{e}") + logger.error(f"Loading {adapter_name} was unsuccessful with the following error: \n{e}") raise warn_msg = "" @@ -712,7 +712,7 @@ class PeftAdapterMixin: if self.lora_scale != 1.0: module.scale_layer(self.lora_scale) - # For BC with prevous PEFT versions, we need to check the signature + # For BC with previous PEFT versions, we need to check the signature # of the `merge` method to see if it supports the `adapter_names` argument. supported_merge_kwargs = list(inspect.signature(module.merge).parameters) if "adapter_names" in supported_merge_kwargs: diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index c2843fc740..c15f828735 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -453,7 +453,7 @@ class FromSingleFileMixin: logger.warning( "Detected legacy `from_single_file` loading behavior. Attempting to create the pipeline based on inferred components.\n" "This may lead to errors if the model components are not correctly inferred. \n" - "To avoid this warning, please explicity pass the `config` argument to `from_single_file` with a path to a local diffusers model repo \n" + "To avoid this warning, please explicitly pass the `config` argument to `from_single_file` with a path to a local diffusers model repo \n" "e.g. `from_single_file(, config=) \n" "or run `from_single_file` with `local_files_only=False` first to update the local cache directory with " "the necessary config files.\n" diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index b55b1b5520..3a2855df2d 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -2278,7 +2278,7 @@ def convert_flux_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): f"double_blocks.{i}.txt_attn.proj.bias" ) - # single transfomer blocks + # single transformer blocks for i in range(num_single_layers): block_prefix = f"single_transformer_blocks.{i}." # norm.linear <- single_blocks.0.modulation.lin @@ -2872,7 +2872,7 @@ def convert_auraflow_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): def convert_lumina2_to_diffusers(checkpoint, **kwargs): converted_state_dict = {} - # Original Lumina-Image-2 has an extra norm paramter that is unused + # Original Lumina-Image-2 has an extra norm parameter that is unused # We just remove it here checkpoint.pop("norm_final.weight", None) diff --git a/src/diffusers/loaders/transformer_sd3.py b/src/diffusers/loaders/transformer_sd3.py index ece17e6728..4715372f3d 100644 --- a/src/diffusers/loaders/transformer_sd3.py +++ b/src/diffusers/loaders/transformer_sd3.py @@ -123,7 +123,7 @@ class SD3Transformer2DLoadersMixin: key = key.replace(f"layers.{idx}.2.1", f"layers.{idx}.adaln_proj") updated_state_dict[key] = value - # Image projetion parameters + # Image projection parameters embed_dim = updated_state_dict["proj_in.weight"].shape[1] output_dim = updated_state_dict["proj_out.weight"].shape[0] hidden_dim = updated_state_dict["proj_in.weight"].shape[0] diff --git a/src/diffusers/models/controlnets/controlnet_xs.py b/src/diffusers/models/controlnets/controlnet_xs.py index 608be6b702..9248f934bc 100644 --- a/src/diffusers/models/controlnets/controlnet_xs.py +++ b/src/diffusers/models/controlnets/controlnet_xs.py @@ -734,17 +734,17 @@ class UNetControlNetXSModel(ModelMixin, ConfigMixin): unet (`UNet2DConditionModel`): The UNet model we want to control. controlnet (`ControlNetXSAdapter`): - The ConntrolNet-XS adapter with which the UNet will be fused. If none is given, a new ConntrolNet-XS + The ControlNet-XS adapter with which the UNet will be fused. If none is given, a new ControlNet-XS adapter will be created. size_ratio (float, *optional*, defaults to `None`): - Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. ctrl_block_out_channels (`List[int]`, *optional*, defaults to `None`): - Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details, + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details, where this parameter is called `block_out_channels`. time_embedding_mix (`float`, *optional*, defaults to None): - Used to contruct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. ctrl_optional_kwargs (`Dict`, *optional*, defaults to `None`): - Passed to the `init` of the new controlent if no controlent was given. + Passed to the `init` of the new controlnet if no controlnet was given. """ if controlnet is None: controlnet = ControlNetXSAdapter.from_unet( diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py index b1e14ca6a7..0e1144f601 100644 --- a/src/diffusers/models/embeddings.py +++ b/src/diffusers/models/embeddings.py @@ -97,7 +97,7 @@ def get_3d_sincos_pos_embed( The spatial dimension of positional embeddings. If an integer is provided, the same size is applied to both spatial dimensions (height and width). temporal_size (`int`): - The temporal dimension of postional embeddings (number of frames). + The temporal dimension of positional embeddings (number of frames). spatial_interpolation_scale (`float`, defaults to 1.0): Scale factor for spatial grid interpolation. temporal_interpolation_scale (`float`, defaults to 1.0): @@ -169,7 +169,7 @@ def _get_3d_sincos_pos_embed_np( The spatial dimension of positional embeddings. If an integer is provided, the same size is applied to both spatial dimensions (height and width). temporal_size (`int`): - The temporal dimension of postional embeddings (number of frames). + The temporal dimension of positional embeddings (number of frames). spatial_interpolation_scale (`float`, defaults to 1.0): Scale factor for spatial grid interpolation. temporal_interpolation_scale (`float`, defaults to 1.0): diff --git a/src/diffusers/models/transformers/latte_transformer_3d.py b/src/diffusers/models/transformers/latte_transformer_3d.py index 27fb3f51a2..4f413ea6a5 100644 --- a/src/diffusers/models/transformers/latte_transformer_3d.py +++ b/src/diffusers/models/transformers/latte_transformer_3d.py @@ -30,7 +30,7 @@ class LatteTransformer3DModel(ModelMixin, ConfigMixin, CacheMixin): _supports_gradient_checkpointing = True """ - A 3D Transformer model for video-like data, paper: https://arxiv.org/abs/2401.03048, offical code: + A 3D Transformer model for video-like data, paper: https://arxiv.org/abs/2401.03048, official code: https://github.com/Vchitect/Latte Parameters: @@ -216,7 +216,7 @@ class LatteTransformer3DModel(ModelMixin, ConfigMixin, CacheMixin): ) num_patches = height * width - hidden_states = self.pos_embed(hidden_states) # alrady add positional embeddings + hidden_states = self.pos_embed(hidden_states) # already add positional embeddings added_cond_kwargs = {"resolution": None, "aspect_ratio": None} timestep, embedded_timestep = self.adaln_single( diff --git a/src/diffusers/models/transformers/lumina_nextdit2d.py b/src/diffusers/models/transformers/lumina_nextdit2d.py index 320950866c..6cf19cb3c3 100644 --- a/src/diffusers/models/transformers/lumina_nextdit2d.py +++ b/src/diffusers/models/transformers/lumina_nextdit2d.py @@ -43,7 +43,7 @@ class LuminaNextDiTBlock(nn.Module): num_kv_heads (`int`): Number of attention heads in key and value features (if using GQA), or set to None for the same as query. multiple_of (`int`): The number of multiple of ffn layer. - ffn_dim_multiplier (`float`): The multipier factor of ffn layer dimension. + ffn_dim_multiplier (`float`): The multiplier factor of ffn layer dimension. norm_eps (`float`): The eps for norm layer. qk_norm (`bool`): normalization for query and key. cross_attention_dim (`int`): Cross attention embedding dimension of the input text prompt hidden_states. diff --git a/src/diffusers/models/unets/unet_i2vgen_xl.py b/src/diffusers/models/unets/unet_i2vgen_xl.py index c275e16744..58fa30e497 100644 --- a/src/diffusers/models/unets/unet_i2vgen_xl.py +++ b/src/diffusers/models/unets/unet_i2vgen_xl.py @@ -154,7 +154,7 @@ class I2VGenXLUNet(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin): # of that, we used `num_attention_heads` for arguments that actually denote attention head dimension. This # is why we ignore `num_attention_heads` and calculate it from `attention_head_dims` below. # This is still an incorrect way of calculating `num_attention_heads` but we need to stick to it - # without running proper depcrecation cycles for the {down,mid,up} blocks which are a + # without running proper deprecation cycles for the {down,mid,up} blocks which are a # part of the public API. num_attention_heads = attention_head_dim diff --git a/src/diffusers/pipelines/amused/pipeline_amused.py b/src/diffusers/pipelines/amused/pipeline_amused.py index 12f7dc7c59..f0948ede9b 100644 --- a/src/diffusers/pipelines/amused/pipeline_amused.py +++ b/src/diffusers/pipelines/amused/pipeline_amused.py @@ -131,7 +131,7 @@ class AmusedPipeline(DiffusionPipeline): generation deterministic. latents (`torch.IntTensor`, *optional*): Pre-generated tokens representing latent vectors in `self.vqvae`, to be used as inputs for image - gneration. If not provided, the starting latents will be completely masked. + generation. If not provided, the starting latents will be completely masked. prompt_embeds (`torch.Tensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not provided, text embeddings are generated from the `prompt` input argument. A single vector from the diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py index f80771381b..87d78646a9 100644 --- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py +++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py @@ -373,7 +373,7 @@ class AudioLDM2Pipeline(DiffusionPipeline): *e.g.* prompt weighting. If not provided, negative_prompt_embeds will be computed from `negative_prompt` input argument. generated_prompt_embeds (`torch.Tensor`, *optional*): - Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs, + Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. negative_generated_prompt_embeds (`torch.Tensor`, *optional*): @@ -394,7 +394,7 @@ class AudioLDM2Pipeline(DiffusionPipeline): attention_mask (`torch.LongTensor`): Attention mask to be applied to the `prompt_embeds`. generated_prompt_embeds (`torch.Tensor`): - Text embeddings generated from the GPT2 langauge model. + Text embeddings generated from the GPT2 language model. Example: @@ -904,7 +904,7 @@ class AudioLDM2Pipeline(DiffusionPipeline): Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. generated_prompt_embeds (`torch.Tensor`, *optional*): - Pre-generated text embeddings from the GPT2 langauge model. Can be used to easily tweak text inputs, + Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. negative_generated_prompt_embeds (`torch.Tensor`, *optional*): diff --git a/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py b/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py index cbd8bef679..ee9615e828 100644 --- a/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +++ b/src/diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py @@ -138,7 +138,7 @@ class BlipDiffusionPipeline(DiffusionPipeline): def get_query_embeddings(self, input_image, src_subject): return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False) - # from the original Blip Diffusion code, speciefies the target subject and augments the prompt by repeating it + # from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20): rv = [] for prompt, tgt_subject in zip(prompts, tgt_subjects): diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py index 88c387d48d..c73dd9824f 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py @@ -149,7 +149,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline): def get_query_embeddings(self, input_image, src_subject): return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False) - # from the original Blip Diffusion code, speciefies the target subject and augments the prompt by repeating it + # from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20): rv = [] for prompt, tgt_subject in zip(prompts, tgt_subjects): diff --git a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py index 901ca25c57..8792961e31 100644 --- a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +++ b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py @@ -739,7 +739,7 @@ class StableDiffusionControlNetXSPipeline( callback_on_step_end_tensor_inputs (`List`, *optional*): The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the - `._callback_tensor_inputs` attribute of your pipeine class. + `._callback_tensor_inputs` attribute of your pipeline class. Examples: Returns: diff --git a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py index acf1f5489e..1d36038d3a 100644 --- a/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +++ b/src/diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py @@ -880,7 +880,7 @@ class StableDiffusionXLControlNetXSPipeline( callback_on_step_end_tensor_inputs (`List`, *optional*): The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the - `._callback_tensor_inputs` attribute of your pipeine class. + `._callback_tensor_inputs` attribute of your pipeline class. Examples: diff --git a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py index 34b2a39455..b33c3735c2 100644 --- a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +++ b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py @@ -97,7 +97,7 @@ class DanceDiffusionPipeline(DiffusionPipeline): for i, audio in enumerate(audios): write(f"maestro_test_{i}.wav", pipe.unet.sample_rate, audio.transpose()) - # To dislay in google colab + # To display in google colab import IPython.display as ipd for audio in audios: diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py index 7225f2f234..d0e3d208f9 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py @@ -509,7 +509,8 @@ class StableDiffusionModelEditingPipeline( The destination prompt. Must contain all words from `source_prompt` with additional ones to specify the target edit. lamb (`float`, *optional*, defaults to 0.1): - The lambda parameter specifying the regularization intesity. Smaller values increase the editing power. + The lambda parameter specifying the regularization intensity. Smaller values increase the editing + power. restart_params (`bool`, *optional*, defaults to True): Restart the model parameters to their pre-trained version before editing. This is done to avoid edit compounding. When it is `False`, edits accumulate. diff --git a/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py index bc276811ff..7dd8182dfe 100644 --- a/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +++ b/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py @@ -1097,7 +1097,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin): cross_attention_kwargs (`dict`, *optional*): A kwargs dictionary that if specified is passed along to the [`AttnProcessor`]. added_cond_kwargs: (`dict`, *optional*): - A kwargs dictionary containin additional embeddings that if specified are added to the embeddings that + A kwargs dictionary containing additional embeddings that if specified are added to the embeddings that are passed along to the UNet blocks. down_block_additional_residuals (`tuple` of `torch.Tensor`, *optional*): additional residuals to be added to UNet long skip connections from down blocks to up blocks for diff --git a/src/diffusers/pipelines/free_noise_utils.py b/src/diffusers/pipelines/free_noise_utils.py index 8ea5eb7dd5..4a65008183 100644 --- a/src/diffusers/pipelines/free_noise_utils.py +++ b/src/diffusers/pipelines/free_noise_utils.py @@ -478,7 +478,7 @@ class AnimateDiffFreeNoiseMixin: Must be one of ["shuffle_context", "repeat_context", "random"]. - "shuffle_context" Shuffles a fixed batch of `context_length` latents to create a final latent of size - `num_frames`. This is usually the best setting for most generation scenarious. However, there + `num_frames`. This is usually the best setting for most generation scenarios. However, there might be visible repetition noticeable in the kinds of motion/animation generated. - "repeated_context" Repeats a fixed batch of `context_length` latents to create a final latent of size diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index 58d65a190d..a00b16d000 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -462,7 +462,7 @@ class I2VGenXLPipeline( image_latents = image_latents.unsqueeze(2) # Append a position mask for each subsequent frame - # after the intial image latent frame + # after the initial image latent frame frame_position_mask = [] for frame_idx in range(num_frames - 1): scale = (frame_idx + 1) / (num_frames - 1) diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py index 769c834ec3..a838f5618f 100644 --- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py @@ -496,7 +496,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline): "As of diffusers==0.19.0 this behavior has been inverted. Now white pixels are repainted and black pixels are preserved. " "This way, Kandinsky's masking behavior is aligned with Stable Diffusion. " "THIS means that you HAVE to invert the input mask to have the same behavior as before as explained in https://github.com/huggingface/diffusers/pull/4207. " - "This warning will be surpressed after the first inference call and will be removed in diffusers>0.23.0" + "This warning will be suppressed after the first inference call and will be removed in diffusers>0.23.0" ) self._warn_has_been_called = True diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py index 482093a4bb..e99aa918ff 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py @@ -386,7 +386,7 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline): "As of diffusers==0.19.0 this behavior has been inverted. Now white pixels are repainted and black pixels are preserved. " "This way, Kandinsky's masking behavior is aligned with Stable Diffusion. " "THIS means that you HAVE to invert the input mask to have the same behavior as before as explained in https://github.com/huggingface/diffusers/pull/4207. " - "This warning will be surpressed after the first inference call and will be removed in diffusers>0.23.0" + "This warning will be suppressed after the first inference call and will be removed in diffusers>0.23.0" ) self._warn_has_been_called = True diff --git a/src/diffusers/pipelines/kolors/text_encoder.py b/src/diffusers/pipelines/kolors/text_encoder.py index 757569c880..7fd1a2ec0e 100644 --- a/src/diffusers/pipelines/kolors/text_encoder.py +++ b/src/diffusers/pipelines/kolors/text_encoder.py @@ -668,7 +668,7 @@ class Embedding(torch.nn.Module): # Embeddings. words_embeddings = self.word_embeddings(input_ids) embeddings = words_embeddings - # Data format change to avoid explicit tranposes : [b s h] --> [s b h]. + # Data format change to avoid explicit transposes : [b s h] --> [s b h]. embeddings = embeddings.transpose(0, 1).contiguous() # If the input flag for fp32 residual connection is set, convert for float. if self.fp32_residual_connection: diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py index bdac47c47a..37fe35278c 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py @@ -1458,7 +1458,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred - # modifed so that updated xtm1 is returned as well (to avoid error accumulation) + # modified so that updated xtm1 is returned as well (to avoid error accumulation) mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction if variance > 0.0: noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta) diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py index cad7d8a66a..a062b5ae6d 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py @@ -1742,7 +1742,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred - # modifed so that updated xtm1 is returned as well (to avoid error accumulation) + # modified so that updated xtm1 is returned as well (to avoid error accumulation) mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction if variance > 0.0: noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta) diff --git a/src/diffusers/pipelines/marigold/marigold_image_processing.py b/src/diffusers/pipelines/marigold/marigold_image_processing.py index 0723014ad3..5130a87660 100644 --- a/src/diffusers/pipelines/marigold/marigold_image_processing.py +++ b/src/diffusers/pipelines/marigold/marigold_image_processing.py @@ -426,7 +426,7 @@ class MarigoldImageProcessor(ConfigMixin): if isinstance(img, np.ndarray): img = torch.from_numpy(img) if not torch.is_floating_point(img): - raise ValueError(f"{prefix}: unexected dtype={img.dtype}.") + raise ValueError(f"{prefix}: unexpected dtype={img.dtype}.") else: raise ValueError(f"{prefix}: unexpected type={type(img)}.") if val_min != 0.0 or val_max != 1.0: @@ -464,7 +464,7 @@ class MarigoldImageProcessor(ConfigMixin): if torch.is_tensor(img): img = img.cpu().numpy() if not np.issubdtype(img.dtype, np.floating): - raise ValueError(f"{prefix}: unexected dtype={img.dtype}.") + raise ValueError(f"{prefix}: unexpected dtype={img.dtype}.") if val_min != 0.0 or val_max != 1.0: img = (img - val_min) / (val_max - val_min) img = (img * (2**16 - 1)).astype(np.uint16) diff --git a/src/diffusers/pipelines/omnigen/pipeline_omnigen.py b/src/diffusers/pipelines/omnigen/pipeline_omnigen.py index 5fe5be3b26..eb564b841e 100644 --- a/src/diffusers/pipelines/omnigen/pipeline_omnigen.py +++ b/src/diffusers/pipelines/omnigen/pipeline_omnigen.py @@ -176,7 +176,7 @@ class OmniGenPipeline( get the continue embedding of input images by VAE Args: - input_pixel_values: normlized pixel of input images + input_pixel_values: normalized pixel of input images device: Returns: torch.Tensor """ diff --git a/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py index b84f5d5559..71245a75e2 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py @@ -115,7 +115,7 @@ EXAMPLE_DOC_STRING = """ ... with torch.no_grad(), torch.autocast("cuda"): ... depth_map = depth_estimator(image).predicted_depth - ... depth_map = torch.nn.fuctional.interpolate( + ... depth_map = torch.nn.functional.interpolate( ... depth_map.unsqueeze(1), ... size=(1024, 1024), ... mode="bicubic", diff --git a/src/diffusers/pipelines/shap_e/renderer.py b/src/diffusers/pipelines/shap_e/renderer.py index dd25945590..00f873115f 100644 --- a/src/diffusers/pipelines/shap_e/renderer.py +++ b/src/diffusers/pipelines/shap_e/renderer.py @@ -1038,7 +1038,7 @@ class ShapERenderer(ModelMixin, ConfigMixin): textures = _convert_srgb_to_linear(textures) textures = textures.float() - # 3.3 augument the mesh with texture data + # 3.3 augment the mesh with texture data assert len(textures.shape) == 3 and textures.shape[-1] == len(texture_channels), ( f"expected [meta_batch x inner_batch x texture_channels] field results, but got {textures.shape}" ) diff --git a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py index 38f1c4314e..fce8efdd3c 100644 --- a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +++ b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py @@ -524,9 +524,9 @@ class StableCascadeDecoderPipeline(DiffusionPipeline): latents = self.vqgan.config.scale_factor * latents images = self.vqgan.decode(latents).sample.clamp(0, 1) if output_type == "np": - images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work + images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work elif output_type == "pil": - images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesnt work + images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work images = self.numpy_to_pil(images) else: images = latents diff --git a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py index 241c454e10..f08e38e7ce 100644 --- a/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +++ b/src/diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py @@ -626,11 +626,11 @@ class StableCascadePriorPipeline(DiffusionPipeline): self.maybe_free_model_hooks() if output_type == "np": - latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesnt work - prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesnt work + latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work + prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work negative_prompt_embeds = ( negative_prompt_embeds.cpu().float().numpy() if negative_prompt_embeds is not None else None - ) # float() as bfloat16-> numpy doesnt work + ) # float() as bfloat16-> numpy doesn't work if not return_dict: return ( diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py index 351b146fb4..2c972284a1 100644 --- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py @@ -1047,7 +1047,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM class GaussianSmoothing(torch.nn.Module): """ Arguments: - Apply gaussian smoothing on a 1d, 2d or 3d tensor. Filtering is performed seperately for each channel in the input + Apply gaussian smoothing on a 1d, 2d or 3d tensor. Filtering is performed separately for each channel in the input using a depthwise convolution. channels (int, sequence): Number of channels of the input tensors. Output will have this number of channels as well. diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py index 1f29f577f8..1ca1fd2ded 100755 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -123,7 +123,7 @@ class StableDiffusionKDiffusionPipeline( super().__init__() logger.info( - f"{self.__class__} is an experimntal pipeline and is likely to change in the future. We recommend to use" + f"{self.__class__} is an experimental pipeline and is likely to change in the future. We recommend to use" " this pipeline for fast experimentation / iteration if needed, but advice to rely on existing pipelines" " as defined in https://huggingface.co/docs/diffusers/api/schedulers#implemented-schedulers for" " production settings." diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index 6cd0e415e1..85b157d8ef 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -123,7 +123,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 5eacb64d01..d5382517ca 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -121,7 +121,7 @@ def _preprocess_adapter_image(image, height, width): image = torch.cat(image, dim=0) else: raise ValueError( - f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but recive: {image[0].ndim}" + f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}" ) return image diff --git a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py index 75e5d43678..29f99f3fc7 100644 --- a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py +++ b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py @@ -140,7 +140,7 @@ class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin): input_ids (`torch.Tensor` of shape `(N, max_seq_len)`): Text tokens to use for inference. prefix_embeds (`torch.Tensor` of shape `(N, prefix_length, 768)`): - Prefix embedding to preprend to the embedded tokens. + Prefix embedding to prepend to the embedded tokens. attention_mask (`torch.Tensor` of shape `(N, prefix_length + max_seq_len, 768)`, *optional*): Attention mask for the prefix embedding. labels (`torch.Tensor`, *optional*): diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index 66d7404fb9..865dba75b7 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -803,7 +803,7 @@ class UniDiffuserPipeline(DiffusionPipeline): def _combine(self, img_vae, img_clip): r""" - Combines a latent iamge img_vae of shape (B, C, H, W) and a CLIP-embedded image img_clip of shape (B, 1, + Combines a latent image img_vae of shape (B, C, H, W) and a CLIP-embedded image img_clip of shape (B, 1, clip_img_dim) into a single tensor of shape (B, C * H * W + clip_img_dim). """ img_vae = torch.reshape(img_vae, (img_vae.shape[0], -1)) diff --git a/src/diffusers/quantizers/base.py b/src/diffusers/quantizers/base.py index fa9ba98e6d..ffa654c98c 100644 --- a/src/diffusers/quantizers/base.py +++ b/src/diffusers/quantizers/base.py @@ -199,7 +199,7 @@ class DiffusersQuantizer(ABC): def dequantize(self, model): """ - Potentially dequantize the model to retrive the original model, with some loss in accuracy / performance. Note + Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance. Note not all quantization schemes support this. """ model = self._dequantize(model) diff --git a/src/diffusers/quantizers/bitsandbytes/utils.py b/src/diffusers/quantizers/bitsandbytes/utils.py index 5476b93a4c..9943c1a511 100644 --- a/src/diffusers/quantizers/bitsandbytes/utils.py +++ b/src/diffusers/quantizers/bitsandbytes/utils.py @@ -49,7 +49,7 @@ def _replace_with_bnb_linear( """ Private method that wraps the recursion for module replacement. - Returns the converted model and a boolean that indicates if the conversion has been successfull or not. + Returns the converted model and a boolean that indicates if the conversion has been successful or not. """ for name, module in model.named_children(): if current_key_name is None: @@ -223,7 +223,7 @@ def _dequantize_and_replace( performance drop compared to the original model before quantization - use it only for specific usecases such as QLoRA adapters merging. - Returns the converted model and a boolean that indicates if the conversion has been successfull or not. + Returns the converted model and a boolean that indicates if the conversion has been successful or not. """ quant_method = quantization_config.quantization_method() diff --git a/src/diffusers/quantizers/gguf/gguf_quantizer.py b/src/diffusers/quantizers/gguf/gguf_quantizer.py index 97f03b07a3..b3e10b1c32 100644 --- a/src/diffusers/quantizers/gguf/gguf_quantizer.py +++ b/src/diffusers/quantizers/gguf/gguf_quantizer.py @@ -49,7 +49,7 @@ class GGUFQuantizer(DiffusersQuantizer): def validate_environment(self, *args, **kwargs): if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"): raise ImportError( - "Loading GGUF Parameters requires `accelerate` installed in your enviroment: `pip install 'accelerate>=0.26.0'`" + "Loading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`" ) if not is_gguf_available() or is_gguf_version("<", "0.10.0"): raise ImportError( @@ -82,7 +82,7 @@ class GGUFQuantizer(DiffusersQuantizer): inferred_shape = _quant_shape_from_byte_shape(loaded_param_shape, type_size, block_size) if inferred_shape != current_param_shape: raise ValueError( - f"{param_name} has an expected quantized shape of: {inferred_shape}, but receieved shape: {loaded_param_shape}" + f"{param_name} has an expected quantized shape of: {inferred_shape}, but received shape: {loaded_param_shape}" ) return True diff --git a/src/diffusers/quantizers/torchao/torchao_quantizer.py b/src/diffusers/quantizers/torchao/torchao_quantizer.py index f9fb217ed6..def7ee33e3 100644 --- a/src/diffusers/quantizers/torchao/torchao_quantizer.py +++ b/src/diffusers/quantizers/torchao/torchao_quantizer.py @@ -262,7 +262,7 @@ class TorchAoHfQuantizer(DiffusersQuantizer): **kwargs, ): r""" - Each nn.Linear layer that needs to be quantized is processsed here. First, we set the value the weight tensor, + Each nn.Linear layer that needs to be quantized is processed here. First, we set the value the weight tensor, then we move it to the target device. Finally, we quantize the module. """ module, tensor_name = get_module_from_name(model, param_name) diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py index daae50627d..dd28af3607 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py @@ -218,7 +218,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin): if algorithm_type not in ["dpmsolver++", "sde-dpmsolver++"] and final_sigmas_type == "zero": raise ValueError( - f"`final_sigmas_type` {final_sigmas_type} is not supported for `algorithm_type` {algorithm_type}. Please chooose `sigma_min` instead." + f"`final_sigmas_type` {final_sigmas_type} is not supported for `algorithm_type` {algorithm_type}. Please choose `sigma_min` instead." ) # setable values diff --git a/src/diffusers/utils/export_utils.py b/src/diffusers/utils/export_utils.py index 30d2c8bebd..07cf46928a 100644 --- a/src/diffusers/utils/export_utils.py +++ b/src/diffusers/utils/export_utils.py @@ -155,7 +155,7 @@ def export_to_video( bitrate: Set a constant bitrate for the video encoding. Default is None causing `quality` parameter to be used instead. Better quality videos with smaller file sizes will result from using the `quality` variable bitrate parameter - rather than specifiying a fixed bitrate with this parameter. + rather than specifying a fixed bitrate with this parameter. macro_block_size: Size constraint for video. Width and height, must be divisible by this number. If not divisible by this number diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py index d1269fbc5f..7d0a6faa7a 100644 --- a/src/diffusers/utils/peft_utils.py +++ b/src/diffusers/utils/peft_utils.py @@ -153,19 +153,19 @@ def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict, is_unet=True r = lora_alpha = list(rank_dict.values())[0] if len(set(rank_dict.values())) > 1: - # get the rank occuring the most number of times + # get the rank occurring the most number of times r = collections.Counter(rank_dict.values()).most_common()[0][0] - # for modules with rank different from the most occuring rank, add it to the `rank_pattern` + # for modules with rank different from the most occurring rank, add it to the `rank_pattern` rank_pattern = dict(filter(lambda x: x[1] != r, rank_dict.items())) rank_pattern = {k.split(".lora_B.")[0]: v for k, v in rank_pattern.items()} if network_alpha_dict is not None and len(network_alpha_dict) > 0: if len(set(network_alpha_dict.values())) > 1: - # get the alpha occuring the most number of times + # get the alpha occurring the most number of times lora_alpha = collections.Counter(network_alpha_dict.values()).most_common()[0][0] - # for modules with alpha different from the most occuring alpha, add it to the `alpha_pattern` + # for modules with alpha different from the most occurring alpha, add it to the `alpha_pattern` alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items())) if is_unet: alpha_pattern = { diff --git a/src/diffusers/utils/state_dict_utils.py b/src/diffusers/utils/state_dict_utils.py index 3682c5bfac..15a91040c4 100644 --- a/src/diffusers/utils/state_dict_utils.py +++ b/src/diffusers/utils/state_dict_utils.py @@ -219,7 +219,7 @@ def convert_state_dict_to_diffusers(state_dict, original_type=None, **kwargs): kwargs (`dict`, *args*): Additional arguments to pass to the method. - - **adapter_name**: For example, in case of PEFT, some keys will be pre-pended + - **adapter_name**: For example, in case of PEFT, some keys will be prepended with the adapter name, therefore needs a special handling. By default PEFT also takes care of that in `get_peft_model_state_dict` method: https://github.com/huggingface/peft/blob/ba0477f2985b1ba311b83459d29895c809404e99/src/peft/utils/save_and_load.py#L92 @@ -290,7 +290,7 @@ def convert_state_dict_to_kohya(state_dict, original_type=None, **kwargs): kwargs (`dict`, *args*): Additional arguments to pass to the method. - - **adapter_name**: For example, in case of PEFT, some keys will be pre-pended + - **adapter_name**: For example, in case of PEFT, some keys will be prepended with the adapter name, therefore needs a special handling. By default PEFT also takes care of that in `get_peft_model_state_dict` method: https://github.com/huggingface/peft/blob/ba0477f2985b1ba311b83459d29895c809404e99/src/peft/utils/save_and_load.py#L92 diff --git a/src/diffusers/utils/torch_utils.py b/src/diffusers/utils/torch_utils.py index a5df07e4a3..19c076a4a6 100644 --- a/src/diffusers/utils/torch_utils.py +++ b/src/diffusers/utils/torch_utils.py @@ -61,7 +61,7 @@ def randn_tensor( logger.info( f"The passed generator was created on 'cpu' even though a tensor on {device} was expected." f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably" - f" slighly speed up this function by passing a generator that was created on the {device} device." + f" slightly speed up this function by passing a generator that was created on the {device} device." ) elif gen_device_type != device.type and gen_device_type == "cuda": raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.") diff --git a/src/diffusers/video_processor.py b/src/diffusers/video_processor.py index 2da782b463..5d0fdde8b4 100644 --- a/src/diffusers/video_processor.py +++ b/src/diffusers/video_processor.py @@ -67,7 +67,7 @@ class VideoProcessor(VaeImageProcessor): # ensure the input is a list of videos: # - if it is a batch of videos (5d torch.Tensor or np.ndarray), it is converted to a list of videos (a list of 4d torch.Tensor or np.ndarray) - # - if it is a single video, it is convereted to a list of one video. + # - if it is a single video, it is converted to a list of one video. if isinstance(video, (np.ndarray, torch.Tensor)) and video.ndim == 5: video = list(video) elif isinstance(video, list) and is_valid_image(video[0]) or is_valid_image_imagelist(video): diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py index 97d1a1cc38..b566e894b8 100644 --- a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py +++ b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py @@ -187,6 +187,6 @@ class WuerstchenDecoderPipelineFastTests(PipelineTesterMixin, unittest.TestCase) def test_float16_inference(self): super().test_float16_inference() - @unittest.skip("Test not supoorted.") + @unittest.skip("Test not supported.") def test_encode_prompt_works_in_isolation(self): super().test_encode_prompt_works_in_isolation()