diff --git a/examples/dreambooth/README_flux2.md b/examples/dreambooth/README_flux2.md index ad5d61f1f9e2..3839e377c0b3 100644 --- a/examples/dreambooth/README_flux2.md +++ b/examples/dreambooth/README_flux2.md @@ -347,16 +347,17 @@ When LoRA was first adapted from language models to diffusion models, it was app More recently, SOTA text-to-image diffusion models replaced the Unet with a diffusion Transformer(DiT). With this change, we may also want to explore applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma separated string the exact modules for LoRA training. Here are some examples of target modules you can provide: -- for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"` -- to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2"` -- to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2,norm1_context.linear, norm1.linear,norm.linear,proj_mlp,proj_out"` +- for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.to_qkv_mlp_proj"` +- to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.to_qkv_mlp_proj,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.linear_in,ff.linear_out,ff_context.linear_in,ff_context.linear_out"` +- to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.to_qkv_mlp_proj,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.linear_in,ff.linear_out,ff_context.linear_in,ff_context.linear_out,norm_out.linear,norm_out.proj_out"` > [!NOTE] > `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma separated string: > **single DiT blocks**: to target the ith single transformer block, add the prefix `single_transformer_blocks.i`, e.g. - `single_transformer_blocks.i.attn.to_k` -> **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` +> **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` > [!NOTE] > keep in mind that while training more layers can improve quality and expressiveness, it also increases the size of the output LoRA weights. - +> [!NOTE] +In FLUX2, the q, k, and v projections are fused into a single linear layer named attn.to_qkv_mlp_proj within the single transformer block. Also, the attention output is just attn.to_out, not attn.to_out.0 — it’s no longer a ModuleList like in transformer block. ## Training Image-to-Image diff --git a/examples/dreambooth/train_dreambooth_lora_flux2.py b/examples/dreambooth/train_dreambooth_lora_flux2.py index 317ed2c2b2e1..d95ed5e10cea 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux2.py +++ b/examples/dreambooth/train_dreambooth_lora_flux2.py @@ -1256,7 +1256,13 @@ def main(args): if args.lora_layers is not None: target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: - target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + # target_modules = ["to_k", "to_q", "to_v", "to_out.0"] # just train transformer_blocks + + # train transformer_blocks and single_transformer_blocks + target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + [ + "to_qkv_mlp_proj", + *[f"single_transformer_blocks.{i}.attn.to_out" for i in range(48)], + ] # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig( diff --git a/examples/dreambooth/train_dreambooth_lora_flux2_img2img.py b/examples/dreambooth/train_dreambooth_lora_flux2_img2img.py index 16a3863c881d..144410395ee8 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux2_img2img.py +++ b/examples/dreambooth/train_dreambooth_lora_flux2_img2img.py @@ -1206,7 +1206,13 @@ def main(args): if args.lora_layers is not None: target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: - target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + # target_modules = ["to_k", "to_q", "to_v", "to_out.0"] # just train transformer_blocks + + # train transformer_blocks and single_transformer_blocks + target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + [ + "to_qkv_mlp_proj", + *[f"single_transformer_blocks.{i}.attn.to_out" for i in range(48)], + ] # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig( diff --git a/examples/dreambooth/train_dreambooth_lora_flux2_klein.py b/examples/dreambooth/train_dreambooth_lora_flux2_klein.py index 278c25900a3a..7d1adc829712 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux2_klein.py +++ b/examples/dreambooth/train_dreambooth_lora_flux2_klein.py @@ -1249,7 +1249,13 @@ def main(args): if args.lora_layers is not None: target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: - target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + # target_modules = ["to_k", "to_q", "to_v", "to_out.0"] # just train transformer_blocks + + # train transformer_blocks and single_transformer_blocks + target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + [ + "to_qkv_mlp_proj", + *[f"single_transformer_blocks.{i}.attn.to_out" for i in range(24)], + ] # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig( diff --git a/examples/dreambooth/train_dreambooth_lora_flux2_klein_img2img.py b/examples/dreambooth/train_dreambooth_lora_flux2_klein_img2img.py index 28cbaf8f72e7..f8ca327cb63a 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux2_klein_img2img.py +++ b/examples/dreambooth/train_dreambooth_lora_flux2_klein_img2img.py @@ -1200,7 +1200,13 @@ def main(args): if args.lora_layers is not None: target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: - target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + # target_modules = ["to_k", "to_q", "to_v", "to_out.0"] # just train transformer_blocks + + # train transformer_blocks and single_transformer_blocks + target_modules = ["to_k", "to_q", "to_v", "to_out.0"] + [ + "to_qkv_mlp_proj", + *[f"single_transformer_blocks.{i}.attn.to_out" for i in range(24)], + ] # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig(