From 6801f6931b447a4cb8cf6062f659527843ff4f7c Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Fri, 12 Jun 2026 15:15:03 +0800 Subject: [PATCH] release Image-to-LoRA V2 --- README.md | 8 ++++ README_zh.md | 8 ++++ docs/en/Model_Details/FLUX2.md | 1 + docs/en/Model_Details/HiDream-O1-Image.md | 1 + docs/en/Model_Details/Z-Image.md | 1 + docs/zh/Model_Details/FLUX2.md | 1 + docs/zh/Model_Details/HiDream-O1-Image.md | 1 + docs/zh/Model_Details/Z-Image.md | 1 + .../model_inference/KleinBase4B-i2L-v2.py | 33 ++++++++++++++ .../KleinBase4B-i2L-v2.py | 45 +++++++++++++++++++ .../model_training/full/KleinBase4B-i2L-v2.sh | 19 ++++++++ .../validate_full/KleinBase4B-i2L-v2.py | 35 +++++++++++++++ .../model_inference/HidreamO1-i2L-v2.py | 29 ++++++++++++ .../HidreamO1-i2L-v2.py | 41 +++++++++++++++++ .../model_training/full/HidreamO1-i2L-v2.sh | 19 ++++++++ .../validate_full/HidreamO1-i2L-v2.py | 31 +++++++++++++ .../z_image/model_inference/ZImage-i2L-v2.py | 33 ++++++++++++++ .../model_inference_low_vram/ZImage-i2L-v2.py | 45 +++++++++++++++++++ .../model_training/full/ZImage-i2L-v2.sh | 19 ++++++++ .../validate_full/ZImage-i2L-v2.py | 35 +++++++++++++++ 20 files changed, 406 insertions(+) create mode 100644 examples/flux2/model_inference/KleinBase4B-i2L-v2.py create mode 100644 examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py create mode 100644 examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh create mode 100644 examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py create mode 100644 examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py create mode 100644 examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py create mode 100644 examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh create mode 100644 examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py create mode 100644 examples/z_image/model_inference/ZImage-i2L-v2.py create mode 100644 examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py create mode 100644 examples/z_image/model_training/full/ZImage-i2L-v2.sh create mode 100644 examples/z_image/model_training/validate_full/ZImage-i2L-v2.py diff --git a/README.md b/README.md index 225d7e4c9..b2dda10d1 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,11 @@ We believe that a well-developed open-source code framework can lower the thresh > Currently, the development personnel of this project are limited, with most of the work handled by [Artiprocher](https://github.com/Artiprocher) and [mi804](https://github.com/mi804). Therefore, the progress of new feature development will be relatively slow, and the speed of responding to and resolving issues is limited. We apologize for this and ask developers to understand. +- **June 15, 2026** We have open-sourced Image-to-LoRA V2, compressing the hours-long training process for image style LoRAs into a single model inference step, thereby exploring a new paradigm for LoRA model training. This release includes three models: + * [DiffSynth-Studio/ZImage-i2L-v2](https://modelscope.cn/models/DiffSynth-Studio/ZImage-i2L-v2): Adapted for the Z-Image model + * [DiffSynth-Studio/KleinBase4B-i2L-v2](https://modelscope.cn/models/DiffSynth-Studio/KleinBase4B-i2L-v2): Adapted for the FLUX.2-klein-base-4B model + * [DiffSynth-Studio/HidreamO1-i2L-v2](https://modelscope.cn/models/DiffSynth-Studio/HidreamO1-i2L-v2): Adapted for the Hidream-O1-Image model + - **June 5, 2026** Ideogram 4 open-sourced. Support includes text-to-image inference. For details, please refer to the [documentation](/docs/en/Model_Details/Ideogram-4.md) and [example code](/examples/ideogram4/). - **May 21, 2026**: Added support for image quality metrics models, including FID, CLIP, Aesthetic, PickScore, ImageReward, HPSv2, and HPSv3. For details, refer to the [documentation](/docs/en/Model_Details/Image-Quality-Metrics.md) and [example code](/examples/image_quality_metric/). @@ -317,6 +322,7 @@ Example code for Z-Image is available at: [/examples/z_image/](/examples/z_image |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)| +|[DiffSynth-Studio/ZImage-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/ZImage-i2L-v2)|[code](/examples/z_image/model_inference/ZImage-i2L-v2.py)|[code](/examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py)|[code](/examples/z_image/model_training/full/ZImage-i2L-v2.sh)|[code](/examples/z_image/model_training/validate_full/ZImage-i2L-v2.py)|-|-| @@ -507,6 +513,7 @@ Example code for FLUX.2 is available at: [/examples/flux2/](/examples/flux2/) |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| +|[DiffSynth-Studio/KleinBase4B-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/KleinBase4B-i2L-v2)|[code](/examples/flux2/model_inference/KleinBase4B-i2L-v2.py)|[code](/examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py)|[code](/examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh)|[code](/examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py)|-|-| @@ -951,6 +958,7 @@ Example code for HiDream-O1-Image is available at: [/examples/hidream_o1_image/] |-|-|-|-|-|-|-| |[HiDream-ai/HiDream-O1-Image](https://modelscope.cn/HiDream-ai/HiDream-O1-Image)|[code](/examples/hidream_o1_image/model_inference/HiDream-O1-Image.py)|[code](/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image.py)|[code](/examples/hidream_o1_image/model_training/full/HiDream-O1-Image.sh)|[code](/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image.py)|[code](/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image.sh)|[code](/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image.py)| |[HiDream-ai/HiDream-O1-Image-Dev](https://modelscope.cn/HiDream-ai/HiDream-O1-Image-Dev)|[code](/examples/hidream_o1_image/model_inference/HiDream-O1-Image-Dev.py)|[code](/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image-Dev.py)|[code](/examples/hidream_o1_image/model_training/full/HiDream-O1-Image-Dev.sh)|[code](/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image-Dev.py)|[code](/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image-Dev.sh)|[code](/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image-Dev.py)| +|[DiffSynth-Studio/HidreamO1-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/HidreamO1-i2L-v2)|[code](/examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py)|[code](/examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py)|[code](/examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh)|[code](/examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py)|-|-| diff --git a/README_zh.md b/README_zh.md index 37cdf0095..9791c666f 100644 --- a/README_zh.md +++ b/README_zh.md @@ -34,6 +34,11 @@ DiffSynth 目前包括两个开源项目: > 目前本项目的开发人员有限,大部分工作由 [Artiprocher](https://github.com/Artiprocher) 和 [mi804](https://github.com/mi804) 负责,因此新功能的开发进展会比较缓慢,issue 的回复和解决速度有限,我们对此感到非常抱歉,请各位开发者理解。 +- **2026年6月15日** 我们开源了 Image-to-LoRA V2,将动辄数小时的图像风格 LoRA 训练压缩到一次模型推理中,探索 LoRA 模型训练的新方式。本次开源包括三个模型: + * [DiffSynth-Studio/ZImage-i2L-v2](https://modelscope.cn/models/DiffSynth-Studio/ZImage-i2L-v2):适配模型 Z-Image + * [DiffSynth-Studio/KleinBase4B-i2L-v2](https://modelscope.cn/models/DiffSynth-Studio/KleinBase4B-i2L-v2):适配模型 FLUX.2-klein-base-4B + * [DiffSynth-Studio/HidreamO1-i2L-v2](https://modelscope.cn/models/DiffSynth-Studio/HidreamO1-i2L-v2):适配模型 Hidream-O1-Image + - **2026年6月5日** Ideogram 4 开源,已支持文生图推理。详情请参考[文档](/docs/zh/Model_Details/Ideogram-4.md)和[示例代码](/examples/ideogram4/)。 - **2026年5月21日** 新增图像质量评估模型的支持,包括 FID、CLIP、Aesthetic、PickScore、ImageReward、HPSv2、HPSv3,详情请参考[文档](/docs/zh/Model_Details/Image-Quality-Metrics.md)和[示例代码](/examples/image_quality_metric/) @@ -317,6 +322,7 @@ Z-Image 的示例代码位于:[/examples/z_image/](/examples/z_image/) |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)| +|[DiffSynth-Studio/ZImage-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/ZImage-i2L-v2)|[code](/examples/z_image/model_inference/ZImage-i2L-v2.py)|[code](/examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py)|[code](/examples/z_image/model_training/full/ZImage-i2L-v2.sh)|[code](/examples/z_image/model_training/validate_full/ZImage-i2L-v2.py)|-|-| @@ -507,6 +513,7 @@ FLUX.2 的示例代码位于:[/examples/flux2/](/examples/flux2/) |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| +|[DiffSynth-Studio/KleinBase4B-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/KleinBase4B-i2L-v2)|[code](/examples/flux2/model_inference/KleinBase4B-i2L-v2.py)|[code](/examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py)|[code](/examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh)|[code](/examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py)|-|-| @@ -951,6 +958,7 @@ HiDream-O1-Image 的示例代码位于:[/examples/hidream_o1_image/](/examples |-|-|-|-|-|-|-| |[HiDream-ai/HiDream-O1-Image](https://modelscope.cn/models/HiDream-ai/HiDream-O1-Image)|[code](/examples/hidream_o1_image/model_inference/HiDream-O1-Image.py)|[code](/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image.py)|[code](/examples/hidream_o1_image/model_training/full/HiDream-O1-Image.sh)|[code](/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image.py)|[code](/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image.sh)|[code](/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image.py)| |[HiDream-ai/HiDream-O1-Image-Dev](https://modelscope.cn/models/HiDream-ai/HiDream-O1-Image-Dev)|[code](/examples/hidream_o1_image/model_inference/HiDream-O1-Image-Dev.py)|[code](/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image-Dev.py)|[code](/examples/hidream_o1_image/model_training/full/HiDream-O1-Image-Dev.sh)|[code](/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image-Dev.py)|[code](/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image-Dev.sh)|[code](/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image-Dev.py)| +|[DiffSynth-Studio/HidreamO1-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/HidreamO1-i2L-v2)|[code](/examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py)|[code](/examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py)|[code](/examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh)|[code](/examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py)|-|-| diff --git a/docs/en/Model_Details/FLUX2.md b/docs/en/Model_Details/FLUX2.md index c2fa4b4e0..e5dd73f62 100644 --- a/docs/en/Model_Details/FLUX2.md +++ b/docs/en/Model_Details/FLUX2.md @@ -77,6 +77,7 @@ image.save("image.jpg") |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| +|[DiffSynth-Studio/KleinBase4B-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/KleinBase4B-i2L-v2)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/KleinBase4B-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py)|-|-| Special Training Scripts: diff --git a/docs/en/Model_Details/HiDream-O1-Image.md b/docs/en/Model_Details/HiDream-O1-Image.md index 2186d28ef..1dc19695e 100644 --- a/docs/en/Model_Details/HiDream-O1-Image.md +++ b/docs/en/Model_Details/HiDream-O1-Image.md @@ -63,6 +63,7 @@ image.save("image.jpg") |-|-|-|-|-|-|-| |[HiDream-ai/HiDream-O1-Image](https://modelscope.cn/HiDream-ai/HiDream-O1-Image)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference/HiDream-O1-Image.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/full/HiDream-O1-Image.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image.py)| |[HiDream-ai/HiDream-O1-Image-Dev](https://modelscope.cn/HiDream-ai/HiDream-O1-Image-Dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference/HiDream-O1-Image-Dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image-Dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/full/HiDream-O1-Image-Dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image-Dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image-Dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image-Dev.py)| +|[DiffSynth-Studio/HidreamO1-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/HidreamO1-i2L-v2)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py)|-|-| ## Model Inference diff --git a/docs/en/Model_Details/Z-Image.md b/docs/en/Model_Details/Z-Image.md index 92d2f25eb..2bfbdadb3 100644 --- a/docs/en/Model_Details/Z-Image.md +++ b/docs/en/Model_Details/Z-Image.md @@ -58,6 +58,7 @@ image.save("image.jpg") |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)| +|[DiffSynth-Studio/ZImage-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/ZImage-i2L-v2)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/ZImage-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/ZImage-i2L-v2.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/ZImage-i2L-v2.py)|-|-| Special Training Scripts: diff --git a/docs/zh/Model_Details/FLUX2.md b/docs/zh/Model_Details/FLUX2.md index f5535ac5f..80136346d 100644 --- a/docs/zh/Model_Details/FLUX2.md +++ b/docs/zh/Model_Details/FLUX2.md @@ -77,6 +77,7 @@ image.save("image.jpg") |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| +|[DiffSynth-Studio/KleinBase4B-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/KleinBase4B-i2L-v2)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/KleinBase4B-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py)|-|-| 特殊训练脚本: diff --git a/docs/zh/Model_Details/HiDream-O1-Image.md b/docs/zh/Model_Details/HiDream-O1-Image.md index 20d1eb05b..378bb5661 100644 --- a/docs/zh/Model_Details/HiDream-O1-Image.md +++ b/docs/zh/Model_Details/HiDream-O1-Image.md @@ -63,6 +63,7 @@ image.save("image.jpg") |-|-|-|-|-|-|-| |[HiDream-ai/HiDream-O1-Image](https://www.modelscope.cn/models/HiDream-ai/HiDream-O1-Image)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference/HiDream-O1-Image.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/full/HiDream-O1-Image.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image.py)| |[HiDream-ai/HiDream-O1-Image-Dev](https://www.modelscope.cn/models/HiDream-ai/HiDream-O1-Image-Dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference/HiDream-O1-Image-Dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference_low_vram/HiDream-O1-Image-Dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/full/HiDream-O1-Image-Dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_full/HiDream-O1-Image-Dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/lora/HiDream-O1-Image-Dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_lora/HiDream-O1-Image-Dev.py)| +|[DiffSynth-Studio/HidreamO1-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/HidreamO1-i2L-v2)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py)|-|-| ## 模型推理 diff --git a/docs/zh/Model_Details/Z-Image.md b/docs/zh/Model_Details/Z-Image.md index 7a3a249e3..cbcbe48b3 100644 --- a/docs/zh/Model_Details/Z-Image.md +++ b/docs/zh/Model_Details/Z-Image.md @@ -58,6 +58,7 @@ image.save("image.jpg") |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Union-2.1-8steps.py)| |[PAI/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps](https://www.modelscope.cn/models/PAI/Z-Image-Turbo-Fun-Controlnet-Union-2.1)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_lora/Z-Image-Turbo-Fun-Controlnet-Tile-2.1-8steps.py)| +|[DiffSynth-Studio/ZImage-i2L-v2](https://www.modelscope.cn/models/DiffSynth-Studio/ZImage-i2L-v2)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference/ZImage-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/full/ZImage-i2L-v2.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/z_image/model_training/validate_full/ZImage-i2L-v2.py)|-|-| 特殊训练脚本: diff --git a/examples/flux2/model_inference/KleinBase4B-i2L-v2.py b/examples/flux2/model_inference/KleinBase4B-i2L-v2.py new file mode 100644 index 000000000..a8649b39a --- /dev/null +++ b/examples/flux2/model_inference/KleinBase4B-i2L-v2.py @@ -0,0 +1,33 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/KleinBase4B-i2L-v2")], +) +snapshot_download("DiffSynth-Studio/KleinBase4B-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/image_1_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=42, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py b/examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py new file mode 100644 index 000000000..b89b4c994 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/KleinBase4B-i2L-v2.py @@ -0,0 +1,45 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + lazy_loading=True, + model_configs=[ModelConfig(model_id="DiffSynth-Studio/KleinBase4B-i2L-v2")], +) +snapshot_download("DiffSynth-Studio/KleinBase4B-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/image_1_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=42, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh b/examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh new file mode 100644 index 000000000..ebc7be39b --- /dev/null +++ b/examples/flux2/model_training/full/KleinBase4B-i2L-v2.sh @@ -0,0 +1,19 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/KleinBase4B-i2L-v2/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/KleinBase4B-i2L-v2 \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/KleinBase4B-i2L-v2/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 400 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/KleinBase4B-i2L-v2:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-5 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/KleinBase4B-i2L-v2_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --enable_lora_hot_loading diff --git a/examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py b/examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py new file mode 100644 index 000000000..84726380b --- /dev/null +++ b/examples/flux2/model_training/validate_full/KleinBase4B-i2L-v2.py @@ -0,0 +1,35 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch +from diffsynth import load_state_dict + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/KleinBase4B-i2L-v2")], +) +template.models[0].load_state_dict(load_state_dict("models/train/KleinBase4B-i2L-v2_full/epoch-1.safetensors")) +snapshot_download("DiffSynth-Studio/KleinBase4B-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/image_1_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=42, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py b/examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py new file mode 100644 index 000000000..84c89bb1a --- /dev/null +++ b/examples/hidream_o1_image/model_inference/HidreamO1-i2L-v2.py @@ -0,0 +1,29 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.hidream_o1_image import HiDreamO1ImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch + +pipe = HiDreamO1ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="model-*.safetensors")], + processor_config=ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="./"), +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/HidreamO1-i2L-v2")], +) +snapshot_download("DiffSynth-Studio/HidreamO1-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/multi_input_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py b/examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py new file mode 100644 index 000000000..794ad4633 --- /dev/null +++ b/examples/hidream_o1_image/model_inference_low_vram/HidreamO1-i2L-v2.py @@ -0,0 +1,41 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.hidream_o1_image import HiDreamO1ImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch + +vram_config = { + "offload_dtype": torch.bfloat16, + "offload_device": "cpu", + "onload_dtype": torch.bfloat16, + "onload_device": "cpu", + "preparing_dtype": torch.bfloat16, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = HiDreamO1ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="model-*.safetensors", **vram_config),], + processor_config=ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="./"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + lazy_loading=True, + model_configs=[ModelConfig(model_id="DiffSynth-Studio/HidreamO1-i2L-v2")], +) +snapshot_download("DiffSynth-Studio/HidreamO1-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/multi_input_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh b/examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh new file mode 100644 index 000000000..eab073652 --- /dev/null +++ b/examples/hidream_o1_image/model_training/full/HidreamO1-i2L-v2.sh @@ -0,0 +1,19 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "hidream_o1_image/HidreamO1-i2L-v2/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/hidream_o1_image/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/hidream_o1_image/HidreamO1-i2L-v2 \ + --dataset_metadata_path data/diffsynth_example_dataset/hidream_o1_image/HidreamO1-i2L-v2/metadata.jsonl \ + --max_pixels 4194304 \ + --dataset_repeat 400 \ + --model_id_with_origin_paths "HiDream-ai/HiDream-O1-Image:model-*.safetensors" \ + --processor_config "HiDream-ai/HiDream-O1-Image:./" \ + --learning_rate 1e-5 \ + --num_epochs 2 \ + --output_path "./models/train/HidreamO1-i2L-v2_full" \ + --use_gradient_checkpointing \ + --noise_scale 8.0 \ + --enable_lora_hot_loading \ + --template_model_id_or_path "DiffSynth-Studio/HidreamO1-i2L-v2:" \ + --extra_inputs "template_inputs" \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --trainable_models "template_model" # Use `template_model.emb2lora` to freeze the image encoder. diff --git a/examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py b/examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py new file mode 100644 index 000000000..a9c99d861 --- /dev/null +++ b/examples/hidream_o1_image/model_training/validate_full/HidreamO1-i2L-v2.py @@ -0,0 +1,31 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.hidream_o1_image import HiDreamO1ImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch +from diffsynth import load_state_dict + +pipe = HiDreamO1ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="model-*.safetensors")], + processor_config=ModelConfig(model_id="HiDream-ai/HiDream-O1-Image", origin_file_pattern="./"), +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/HidreamO1-i2L-v2")], +) +template.models[0].load_state_dict(load_state_dict("models/train/HidreamO1-i2L-v2/epoch-1.safetensors")) +snapshot_download("DiffSynth-Studio/HidreamO1-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/multi_input_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/z_image/model_inference/ZImage-i2L-v2.py b/examples/z_image/model_inference/ZImage-i2L-v2.py new file mode 100644 index 000000000..ee23ba656 --- /dev/null +++ b/examples/z_image/model_inference/ZImage-i2L-v2.py @@ -0,0 +1,33 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.z_image import ZImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch + +pipe = ZImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"), +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/ZImage-i2L-v2")], +) +snapshot_download("DiffSynth-Studio/ZImage-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/multi_input_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py b/examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py new file mode 100644 index 000000000..8ed16807d --- /dev/null +++ b/examples/z_image/model_inference_low_vram/ZImage-i2L-v2.py @@ -0,0 +1,45 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.z_image import ZImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch + +vram_config = { + "offload_dtype": torch.bfloat16, + "offload_device": "cpu", + "onload_dtype": torch.bfloat16, + "onload_device": "cpu", + "preparing_dtype": torch.bfloat16, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = ZImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_config), + ], + tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + lazy_loading=True, + model_configs=[ModelConfig(model_id="DiffSynth-Studio/ZImage-i2L-v2")], +) +snapshot_download("DiffSynth-Studio/ZImage-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/multi_input_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file diff --git a/examples/z_image/model_training/full/ZImage-i2L-v2.sh b/examples/z_image/model_training/full/ZImage-i2L-v2.sh new file mode 100644 index 000000000..c754bf34e --- /dev/null +++ b/examples/z_image/model_training/full/ZImage-i2L-v2.sh @@ -0,0 +1,19 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "z_image/ZImage-i2L-v2/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/z_image/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/z_image/ZImage-i2L-v2 \ + --dataset_metadata_path data/diffsynth_example_dataset/z_image/ZImage-i2L-v2/metadata.jsonl \ + --max_pixels 1048576 \ + --dataset_repeat 400 \ + --model_id_with_origin_paths "Tongyi-MAI/Z-Image:transformer/*.safetensors,Tongyi-MAI/Z-Image-Turbo:text_encoder/*.safetensors,Tongyi-MAI/Z-Image-Turbo:vae/diffusion_pytorch_model.safetensors" \ + --learning_rate 1e-5 \ + --num_epochs 2 \ + --output_path "./models/train/ZImage-i2L-v2_full" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --dataset_num_workers 8 \ + --enable_lora_hot_loading \ + --template_model_id_or_path "DiffSynth-Studio/ZImage-i2L-v2:" \ + --extra_inputs "template_inputs" \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --trainable_models "template_model" # Use `template_model.emb2lora` to freeze the image encoder. diff --git a/examples/z_image/model_training/validate_full/ZImage-i2L-v2.py b/examples/z_image/model_training/validate_full/ZImage-i2L-v2.py new file mode 100644 index 000000000..7d2ded901 --- /dev/null +++ b/examples/z_image/model_training/validate_full/ZImage-i2L-v2.py @@ -0,0 +1,35 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.z_image import ZImagePipeline, ModelConfig +from modelscope import snapshot_download +from PIL import Image +import numpy as np +import torch +from diffsynth import load_state_dict + +pipe = ZImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"), +) +pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/ZImage-i2L-v2")], +) +template.models[0].load_state_dict(load_state_dict("models/train/ZImage-i2L-v2_full/epoch-1.safetensors")) +snapshot_download("DiffSynth-Studio/ZImage-i2L-v2", allow_file_pattern="assets/*", local_dir="data") +images = [Image.open(f"data/assets/multi_input_{i}.jpg") for i in range(4)] +image = template( + pipe, + prompt="A cat is sitting on a stone", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"image": images}], + negative_template_inputs = [{"image": [Image.fromarray(np.zeros_like(np.array(i)) + 128) for i in images]}], +) +image.save("image_output.jpg") \ No newline at end of file