From f88b99cb4fa8fcf875760ad778546939c77d245d Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Tue, 17 Mar 2026 13:34:25 +0800 Subject: [PATCH 01/12] diffusion skills framework --- .gitignore | 1 + diffsynth/diffusion/base_pipeline.py | 9 + diffsynth/diffusion/parsers.py | 5 + diffsynth/diffusion/skills.py | 137 +++++++++++ diffsynth/diffusion/training_module.py | 45 ++++ diffsynth/models/flux2_dit.py | 219 +++++++----------- diffsynth/pipelines/flux2_image.py | 7 + .../FLUX.2-klein-base-4B-skills.py | 56 +++++ .../full/FLUX.2-klein-base-4B-skills.sh | 16 ++ .../convert_base_model_to_skill_model.py | 60 +++++ examples/flux2/model_training/train.py | 3 + 11 files changed, 421 insertions(+), 137 deletions(-) create mode 100644 diffsynth/diffusion/skills.py create mode 100644 examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py create mode 100644 examples/flux2/model_training/full/FLUX.2-klein-base-4B-skills.sh create mode 100644 examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py diff --git a/.gitignore b/.gitignore index 6fd0d8e14..a511cf23f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /models /scripts /diffusers +/.vscode *.pkl *.safetensors *.pth diff --git a/diffsynth/diffusion/base_pipeline.py b/diffsynth/diffusion/base_pipeline.py index face31911..52f1f02b4 100644 --- a/diffsynth/diffusion/base_pipeline.py +++ b/diffsynth/diffusion/base_pipeline.py @@ -9,6 +9,7 @@ from ..models.model_loader import ModelPool from ..utils.controlnet import ControlNetInput from ..core.device import get_device_name, IS_NPU_AVAILABLE +from .skills import load_skill_model, load_skill_data_processor class PipelineUnit: @@ -338,6 +339,14 @@ def cfg_guided_model_fn(self, model_fn, cfg_scale, inputs_shared, inputs_posi, i else: noise_pred = noise_pred_posi return noise_pred + + + def load_training_skill_model(self, model_config: ModelConfig = None): + if model_config is not None: + model_config.download_if_necessary() + self.skill_model = load_skill_model(model_config.path, torch_dtype=self.torch_dtype, device=self.device) + self.skill_data_processor = load_skill_data_processor(model_config.path)() + class PipelineUnitGraph: diff --git a/diffsynth/diffusion/parsers.py b/diffsynth/diffusion/parsers.py index b8c6c6afd..9dc90e82c 100644 --- a/diffsynth/diffusion/parsers.py +++ b/diffsynth/diffusion/parsers.py @@ -60,6 +60,10 @@ def add_gradient_config(parser: argparse.ArgumentParser): parser.add_argument("--gradient_accumulation_steps", type=int, default=1, help="Gradient accumulation steps.") return parser +def add_skill_model_config(parser: argparse.ArgumentParser): + parser.add_argument("--skill_model_id_or_path", type=str, default=None, help="Model ID of path of skill models.") + return parser + def add_general_config(parser: argparse.ArgumentParser): parser = add_dataset_base_config(parser) parser = add_model_config(parser) @@ -67,4 +71,5 @@ def add_general_config(parser: argparse.ArgumentParser): parser = add_output_config(parser) parser = add_lora_config(parser) parser = add_gradient_config(parser) + parser = add_skill_model_config(parser) return parser diff --git a/diffsynth/diffusion/skills.py b/diffsynth/diffusion/skills.py new file mode 100644 index 000000000..ced2fe49b --- /dev/null +++ b/diffsynth/diffusion/skills.py @@ -0,0 +1,137 @@ +import torch, os, importlib, warnings, json +from typing import Dict, List, Tuple, Union +from ..core import ModelConfig, load_model +from ..core.device.npu_compatible_device import get_device_type + + +SkillCache = Dict[str, Tuple[torch.Tensor, torch.Tensor]] + + +class SkillModel(torch.nn.Module): + def __init__(self): + super().__init__() + + @torch.no_grad() + def process_inputs(self, pipe=None, **kwargs): + return {} + + def forward(self, **kwargs) -> SkillCache: + raise NotImplementedError() + + +class MultiSkillModel(SkillModel): + def __init__(self, models: List[SkillModel]): + super().__init__() + if not isinstance(models, list): + models = [models] + self.models = torch.nn.ModuleList(models) + + def merge(self, kv_cache_list: List[SkillCache]) -> SkillCache: + names = {} + for kv_cache in kv_cache_list: + for name in kv_cache: + names[name] = None + kv_cache_merged = {} + for name in names: + kv_list = [kv_cache.get(name) for kv_cache in kv_cache_list] + kv_list = [kv for kv in kv_list if kv is not None] + if len(kv_list) > 0: + k = torch.concat([kv[0] for kv in kv_list], dim=1) + v = torch.concat([kv[1] for kv in kv_list], dim=1) + kv_cache_merged[name] = (k, v) + return kv_cache_merged + + @torch.no_grad() + def process_inputs(self, pipe=None, inputs: List[Dict] = None, **kwargs): + return [(i["model_id"], self.models[i["model_id"]].process_inputs(pipe=pipe, **i)) for i in inputs] + + def forward(self, inputs: List[Tuple[int, Dict]], **kwargs) -> SkillCache: + kv_cache_list = [] + for model_id, model_inputs in inputs: + kv_cache = self.models[model_id](**model_inputs) + kv_cache_list.append(kv_cache) + return self.merge(kv_cache_list) + + +def load_skill_model(path, torch_dtype=torch.bfloat16, device="cuda", verbose=1): + spec = importlib.util.spec_from_file_location("skill_model", os.path.join(path, "model.py")) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + model = load_model( + model_class=getattr(module, 'SKILL_MODEL'), + config=getattr(module, 'SKILL_MODEL_CONFIG') if hasattr(module, 'SKILL_MODEL_CONFIG') else None, + path=os.path.join(path, getattr(module, 'SKILL_MODEL_PATH')), + torch_dtype=torch_dtype, + device=device, + ) + if verbose > 0: + metadata = { + "model_architecture": getattr(module, 'SKILL_MODEL').__name__, + "code_path": os.path.join(path, "model.py"), + "weight_path": os.path.join(path, getattr(module, 'SKILL_MODEL_PATH')), + } + print(f"Skill model loaded: {json.dumps(metadata, indent=4)}") + return model + + +def load_skill_data_processor(path): + spec = importlib.util.spec_from_file_location("skill_model", os.path.join(path, "model.py")) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if hasattr(module, 'SKILL_DATA_PROCESSOR'): + processor = getattr(module, 'SKILL_DATA_PROCESSOR') + return processor + else: + return None + + +class SkillsPipeline(MultiSkillModel): + def __init__(self, models: List[SkillModel]): + super().__init__(models) + + @staticmethod + def check_vram_config(model_config: ModelConfig): + params = [ + model_config.offload_device, model_config.offload_dtype, + model_config.onload_device, model_config.onload_dtype, + model_config.preparing_device, model_config.preparing_dtype, + model_config.computation_device, model_config.computation_dtype, + ] + for param in params: + if param is not None: + warnings.warn("SkillsPipeline doesn't support VRAM management. VRAM config will be ignored.") + + @staticmethod + def from_pretrained( + torch_dtype: torch.dtype = torch.bfloat16, + device: Union[str, torch.device] = get_device_type(), + model_configs: list[ModelConfig] = [], + ): + models = [] + for model_config in model_configs: + SkillsPipeline.check_vram_config(model_config) + model_config.download_if_necessary() + model = load_skill_model(model_config.path, torch_dtype=torch_dtype, device=device) + models.append(model) + pipe = SkillsPipeline(models) + return pipe + + def call_single_side(self, pipe = None, inputs: List[Dict] = None): + inputs = self.process_inputs(pipe=pipe, inputs=inputs) + skill_cache = self.forward(inputs) + return skill_cache + + @torch.no_grad() + def __call__( + self, + pipe = None, + inputs: List[Dict] = None, + positive_inputs: List[Dict] = None, + negative_inputs: List[Dict] = None, + ): + shared_cache = self.call_single_side(pipe=pipe, inputs=inputs or []) + positive_cache = self.call_single_side(pipe=pipe, inputs=positive_inputs or []) + negative_cache = self.call_single_side(pipe=pipe, inputs=negative_inputs or []) + positive_cache = self.merge([positive_cache, shared_cache]) + negative_cache = self.merge([negative_cache, shared_cache]) + return {"skill_cache": positive_cache, "negative_skill_cache": negative_cache} diff --git a/diffsynth/diffusion/training_module.py b/diffsynth/diffusion/training_module.py index 0a0011804..37c90d050 100644 --- a/diffsynth/diffusion/training_module.py +++ b/diffsynth/diffusion/training_module.py @@ -6,6 +6,7 @@ class GeneralUnit_RemoveCache(PipelineUnit): + # Only used for training def __init__(self, required_params=tuple(), force_remove_params_shared=tuple(), force_remove_params_posi=tuple(), force_remove_params_nega=tuple()): super().__init__(take_over=True) self.required_params = required_params @@ -27,6 +28,40 @@ def process(self, pipe, inputs_shared, inputs_posi, inputs_nega): return inputs_shared, inputs_posi, inputs_nega +class GeneralUnit_SkillProcessInputs(PipelineUnit): + # Only used for training + def __init__(self, data_processor): + super().__init__( + input_params=("skill_inputs",), + output_params=("skill_inputs",), + ) + self.data_processor = data_processor + + def process(self, pipe, skill_inputs): + if not hasattr(pipe, "skill_model"): + return {} + if self.data_processor is not None: + skill_inputs = self.data_processor(**skill_inputs) + skill_inputs = pipe.skill_model.process_inputs(pipe=pipe, **skill_inputs) + return {"skill_inputs": skill_inputs} + + +class GeneralUnit_SkillForward(PipelineUnit): + # Only used for training + def __init__(self): + super().__init__( + input_params=("skill_inputs",), + output_params=("skill_cache",), + onload_model_names=("skill_model",) + ) + + def process(self, pipe, skill_inputs): + if not hasattr(pipe, "skill_model"): + return {} + skill_cache = pipe.skill_model.forward(**skill_inputs) + return {"skill_cache": skill_cache} + + class DiffusionTrainingModule(torch.nn.Module): def __init__(self): super().__init__() @@ -209,6 +244,16 @@ def parse_lora_target_modules(self, model, lora_target_modules): else: lora_target_modules = lora_target_modules.split(",") return lora_target_modules + + + def load_training_skill_model(self, pipe, path_or_model_id): + if path_or_model_id is None: + return pipe + model_config = self.parse_path_or_model_id(path_or_model_id) + pipe.load_training_skill_model(model_config) + pipe.units.append(GeneralUnit_SkillProcessInputs(pipe.skill_data_processor)) + pipe.units.append(GeneralUnit_SkillForward()) + return pipe def switch_pipe_to_training_mode( diff --git a/diffsynth/models/flux2_dit.py b/diffsynth/models/flux2_dit.py index a1bd02a61..8be717ffd 100644 --- a/diffsynth/models/flux2_dit.py +++ b/diffsynth/models/flux2_dit.py @@ -364,78 +364,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x -class Flux2AttnProcessor: - _attention_backend = None - _parallel_config = None - - def __init__(self): - if not hasattr(F, "scaled_dot_product_attention"): - raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") - - def __call__( - self, - attn: "Flux2Attention", - hidden_states: torch.Tensor, - encoder_hidden_states: torch.Tensor = None, - attention_mask: Optional[torch.Tensor] = None, - image_rotary_emb: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( - attn, hidden_states, encoder_hidden_states - ) - - query = query.unflatten(-1, (attn.heads, -1)) - key = key.unflatten(-1, (attn.heads, -1)) - value = value.unflatten(-1, (attn.heads, -1)) - - query = attn.norm_q(query) - key = attn.norm_k(key) - - if attn.added_kv_proj_dim is not None: - encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) - encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) - encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) - - encoder_query = attn.norm_added_q(encoder_query) - encoder_key = attn.norm_added_k(encoder_key) - - query = torch.cat([encoder_query, query], dim=1) - key = torch.cat([encoder_key, key], dim=1) - value = torch.cat([encoder_value, value], dim=1) - - if image_rotary_emb is not None: - query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) - key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) - - query, key, value = query.to(hidden_states.dtype), key.to(hidden_states.dtype), value.to(hidden_states.dtype) - hidden_states = attention_forward( - query, - key, - value, - q_pattern="b s n d", k_pattern="b s n d", v_pattern="b s n d", out_pattern="b s n d", - ) - hidden_states = hidden_states.flatten(2, 3) - hidden_states = hidden_states.to(query.dtype) - - if encoder_hidden_states is not None: - encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( - [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 - ) - encoder_hidden_states = attn.to_add_out(encoder_hidden_states) - - hidden_states = attn.to_out[0](hidden_states) - hidden_states = attn.to_out[1](hidden_states) - - if encoder_hidden_states is not None: - return hidden_states, encoder_hidden_states - else: - return hidden_states - - class Flux2Attention(torch.nn.Module): - _default_processor_cls = Flux2AttnProcessor - _available_processors = [Flux2AttnProcessor] - def __init__( self, query_dim: int, @@ -449,7 +378,6 @@ def __init__( eps: float = 1e-5, out_dim: int = None, elementwise_affine: bool = True, - processor=None, ): super().__init__() @@ -485,59 +413,45 @@ def __init__( self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) self.to_add_out = torch.nn.Linear(self.inner_dim, query_dim, bias=out_bias) - if processor is None: - processor = self._default_processor_cls() - self.processor = processor - def forward( self, hidden_states: torch.Tensor, encoder_hidden_states: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, image_rotary_emb: Optional[torch.Tensor] = None, + kv_cache = None, **kwargs, ) -> torch.Tensor: - attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) - kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} - return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) - - -class Flux2ParallelSelfAttnProcessor: - _attention_backend = None - _parallel_config = None + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + self, hidden_states, encoder_hidden_states + ) - def __init__(self): - if not hasattr(F, "scaled_dot_product_attention"): - raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + query = query.unflatten(-1, (self.heads, -1)) + key = key.unflatten(-1, (self.heads, -1)) + value = value.unflatten(-1, (self.heads, -1)) - def __call__( - self, - attn: "Flux2ParallelSelfAttention", - hidden_states: torch.Tensor, - attention_mask: Optional[torch.Tensor] = None, - image_rotary_emb: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - # Parallel in (QKV + MLP in) projection - hidden_states = attn.to_qkv_mlp_proj(hidden_states) - qkv, mlp_hidden_states = torch.split( - hidden_states, [3 * attn.inner_dim, attn.mlp_hidden_dim * attn.mlp_mult_factor], dim=-1 - ) + query = self.norm_q(query) + key = self.norm_k(key) - # Handle the attention logic - query, key, value = qkv.chunk(3, dim=-1) + if self.added_kv_proj_dim is not None: + encoder_query = encoder_query.unflatten(-1, (self.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (self.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (self.heads, -1)) - query = query.unflatten(-1, (attn.heads, -1)) - key = key.unflatten(-1, (attn.heads, -1)) - value = value.unflatten(-1, (attn.heads, -1)) + encoder_query = self.norm_added_q(encoder_query) + encoder_key = self.norm_added_k(encoder_key) - query = attn.norm_q(query) - key = attn.norm_k(key) + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) if image_rotary_emb is not None: query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) - query, key, value = query.to(hidden_states.dtype), key.to(hidden_states.dtype), value.to(hidden_states.dtype) + if kv_cache is not None: + key = torch.concat([key, kv_cache[0]], dim=1) + value = torch.concat([value, kv_cache[1]], dim=1) hidden_states = attention_forward( query, key, @@ -547,30 +461,22 @@ def __call__( hidden_states = hidden_states.flatten(2, 3) hidden_states = hidden_states.to(query.dtype) - # Handle the feedforward (FF) logic - mlp_hidden_states = attn.mlp_act_fn(mlp_hidden_states) + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + encoder_hidden_states = self.to_add_out(encoder_hidden_states) - # Concatenate and parallel output projection - hidden_states = torch.cat([hidden_states, mlp_hidden_states], dim=-1) - hidden_states = attn.to_out(hidden_states) + hidden_states = self.to_out[0](hidden_states) + hidden_states = self.to_out[1](hidden_states) - return hidden_states + if encoder_hidden_states is not None: + return hidden_states, encoder_hidden_states + else: + return hidden_states class Flux2ParallelSelfAttention(torch.nn.Module): - """ - Flux 2 parallel self-attention for the Flux 2 single-stream transformer blocks. - - This implements a parallel transformer block, where the attention QKV projections are fused to the feedforward (FF) - input projections, and the attention output projections are fused to the FF output projections. See the [ViT-22B - paper](https://arxiv.org/abs/2302.05442) for a visual depiction of this type of transformer block. - """ - - _default_processor_cls = Flux2ParallelSelfAttnProcessor - _available_processors = [Flux2ParallelSelfAttnProcessor] - # Does not support QKV fusion as the QKV projections are always fused - _supports_qkv_fusion = False - def __init__( self, query_dim: int, @@ -614,20 +520,54 @@ def __init__( # Fused attention output projection + MLP output projection self.to_out = torch.nn.Linear(self.inner_dim + self.mlp_hidden_dim, self.out_dim, bias=out_bias) - if processor is None: - processor = self._default_processor_cls() - self.processor = processor - def forward( self, hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, image_rotary_emb: Optional[torch.Tensor] = None, + kv_cache = None, **kwargs, ) -> torch.Tensor: - attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) - kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} - return self.processor(self, hidden_states, attention_mask, image_rotary_emb, **kwargs) + # Parallel in (QKV + MLP in) projection + hidden_states = self.to_qkv_mlp_proj(hidden_states) + qkv, mlp_hidden_states = torch.split( + hidden_states, [3 * self.inner_dim, self.mlp_hidden_dim * self.mlp_mult_factor], dim=-1 + ) + + # Handle the attention logic + query, key, value = qkv.chunk(3, dim=-1) + + query = query.unflatten(-1, (self.heads, -1)) + key = key.unflatten(-1, (self.heads, -1)) + value = value.unflatten(-1, (self.heads, -1)) + + query = self.norm_q(query) + key = self.norm_k(key) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + if kv_cache is not None: + key = torch.concat([key, kv_cache[0]], dim=1) + value = torch.concat([value, kv_cache[1]], dim=1) + hidden_states = attention_forward( + query, + key, + value, + q_pattern="b s n d", k_pattern="b s n d", v_pattern="b s n d", out_pattern="b s n d", + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + # Handle the feedforward (FF) logic + mlp_hidden_states = self.mlp_act_fn(mlp_hidden_states) + + # Concatenate and parallel output projection + hidden_states = torch.cat([hidden_states, mlp_hidden_states], dim=-1) + hidden_states = self.to_out(hidden_states) + + return hidden_states class Flux2SingleTransformerBlock(nn.Module): @@ -657,7 +597,6 @@ def __init__( eps=eps, mlp_ratio=mlp_ratio, mlp_mult_factor=2, - processor=Flux2ParallelSelfAttnProcessor(), ) def forward( @@ -669,6 +608,7 @@ def forward( joint_attention_kwargs: Optional[Dict[str, Any]] = None, split_hidden_states: bool = False, text_seq_len: Optional[int] = None, + kv_cache = None, ) -> Tuple[torch.Tensor, torch.Tensor]: # If encoder_hidden_states is None, hidden_states is assumed to have encoder_hidden_states already # concatenated @@ -685,6 +625,7 @@ def forward( attn_output = self.attn( hidden_states=norm_hidden_states, image_rotary_emb=image_rotary_emb, + kv_cache=kv_cache, **joint_attention_kwargs, ) @@ -725,7 +666,6 @@ def __init__( added_proj_bias=bias, out_bias=bias, eps=eps, - processor=Flux2AttnProcessor(), ) self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) @@ -742,6 +682,7 @@ def forward( temb_mod_params_txt: Tuple[Tuple[torch.Tensor, torch.Tensor, torch.Tensor], ...], image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, joint_attention_kwargs: Optional[Dict[str, Any]] = None, + kv_cache = None, ) -> Tuple[torch.Tensor, torch.Tensor]: joint_attention_kwargs = joint_attention_kwargs or {} @@ -762,6 +703,7 @@ def forward( hidden_states=norm_hidden_states, encoder_hidden_states=norm_encoder_hidden_states, image_rotary_emb=image_rotary_emb, + kv_cache=kv_cache, **joint_attention_kwargs, ) @@ -969,6 +911,7 @@ def forward( txt_ids: torch.Tensor = None, guidance: torch.Tensor = None, joint_attention_kwargs: Optional[Dict[str, Any]] = None, + kv_cache = None, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False, ): @@ -1013,7 +956,7 @@ def forward( ) # 4. Double Stream Transformer Blocks - for index_block, block in enumerate(self.transformer_blocks): + for block_id, block in enumerate(self.transformer_blocks): encoder_hidden_states, hidden_states = gradient_checkpoint_forward( block, use_gradient_checkpointing=use_gradient_checkpointing, @@ -1024,12 +967,13 @@ def forward( temb_mod_params_txt=double_stream_mod_txt, image_rotary_emb=concat_rotary_emb, joint_attention_kwargs=joint_attention_kwargs, + kv_cache=None if kv_cache is None else kv_cache.get(f"double_{block_id}"), ) # Concatenate text and image streams for single-block inference hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) # 5. Single Stream Transformer Blocks - for index_block, block in enumerate(self.single_transformer_blocks): + for block_id, block in enumerate(self.single_transformer_blocks): hidden_states = gradient_checkpoint_forward( block, use_gradient_checkpointing=use_gradient_checkpointing, @@ -1039,6 +983,7 @@ def forward( temb_mod_params=single_stream_mod, image_rotary_emb=concat_rotary_emb, joint_attention_kwargs=joint_attention_kwargs, + kv_cache=None if kv_cache is None else kv_cache.get(f"single_{block_id}"), ) # Remove text tokens from concatenated stream hidden_states = hidden_states[:, num_txt_tokens:, ...] diff --git a/diffsynth/pipelines/flux2_image.py b/diffsynth/pipelines/flux2_image.py index 34f4d2746..9dda9bf51 100644 --- a/diffsynth/pipelines/flux2_image.py +++ b/diffsynth/pipelines/flux2_image.py @@ -93,6 +93,9 @@ def __call__( initial_noise: torch.Tensor = None, # Steps num_inference_steps: int = 30, + # KV Cache + skill_cache = None, + negative_skill_cache = None, # Progress bar progress_bar_cmd = tqdm, ): @@ -101,9 +104,11 @@ def __call__( # Parameters inputs_posi = { "prompt": prompt, + "skill_cache": skill_cache, } inputs_nega = { "negative_prompt": negative_prompt, + "skill_cache": negative_skill_cache, } inputs_shared = { "cfg_scale": cfg_scale, "embedded_guidance": embedded_guidance, @@ -570,6 +575,7 @@ def model_fn_flux2( image_ids=None, edit_latents=None, edit_image_ids=None, + skill_cache=None, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False, **kwargs, @@ -587,6 +593,7 @@ def model_fn_flux2( encoder_hidden_states=prompt_embeds, txt_ids=text_ids, img_ids=image_ids, + kv_cache=skill_cache, use_gradient_checkpointing=use_gradient_checkpointing, use_gradient_checkpointing_offload=use_gradient_checkpointing_offload, ) diff --git a/examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py b/examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py new file mode 100644 index 000000000..fcf799261 --- /dev/null +++ b/examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py @@ -0,0 +1,56 @@ +from diffsynth.diffusion.skills import SkillsPipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from PIL import Image + + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +skills = SkillsPipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Skills-ControlNet"), + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Skills-Brightness"), + ], +) +skill_cache = skills( + positive_inputs = [ + { + "model_id": 0, + "image": Image.open("xxx.jpg"), + "prompt": "一位长发少女,四周环绕着魔法粒子", + }, + { + "model_id": 1, + "scale": 0.6, + }, + ], + negative_inputs = [ + { + "model_id": 0, + "image": Image.open("xxx.jpg"), + "prompt": "一位长发少女,四周环绕着魔法粒子", + }, + { + "model_id": 1, + "scale": 0.5, + }, + ], + pipe=pipe, +) +image = pipe( + prompt="一位长发少女,四周环绕着魔法粒子", + seed=0, rand_device="cuda", num_inference_steps=50, cfg_scale=4, + height=1024, width=1024, + **skill_cache, +) +image.save("image.jpg") diff --git a/examples/flux2/model_training/full/FLUX.2-klein-base-4B-skills.sh b/examples/flux2/model_training/full/FLUX.2-klein-base-4B-skills.sh new file mode 100644 index 000000000..d56634b62 --- /dev/null +++ b/examples/flux2/model_training/full/FLUX.2-klein-base-4B-skills.sh @@ -0,0 +1,16 @@ +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path /mnt/nas1/duanzhongjie.dzj/dataset/ImagePulseV2 \ + --dataset_metadata_path /mnt/nas1/duanzhongjie.dzj/dataset/ImagePulseV2/metadata_example_ti2ti.jsonl \ + --extra_inputs "skill_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --skill_model_id_or_path "models/base" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 999 \ + --remove_prefix_in_ckpt "pipe.skill_model." \ + --output_path "./models/train/FLUX.2-klein-base-4B-skills_full" \ + --trainable_models "skill_model" \ + --use_gradient_checkpointing \ + --save_steps 200 diff --git a/examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py b/examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py new file mode 100644 index 000000000..21fab7f62 --- /dev/null +++ b/examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py @@ -0,0 +1,60 @@ +from diffsynth import load_state_dict +from safetensors.torch import save_file +import torch + + +def Flux2DiTStateDictConverter(state_dict): + rename_dict = { + "time_guidance_embed.timestep_embedder.linear_1.weight": "time_guidance_embed.timestep_embedder.0.weight", + "time_guidance_embed.timestep_embedder.linear_2.weight": "time_guidance_embed.timestep_embedder.2.weight", + "x_embedder.weight": "img_embedder.weight", + "context_embedder.weight": "txt_embedder.weight", + } + state_dict_ = {} + for name in state_dict: + if name in rename_dict: + state_dict_[rename_dict[name]] = state_dict[name] + elif name.startswith("transformer_blocks"): + if name.endswith("attn.to_q.weight"): + state_dict_[name.replace("to_q", "img_to_qkv").replace(".attn.", ".")] = torch.concat([ + state_dict[name.replace("to_q", "to_q")], + state_dict[name.replace("to_q", "to_k")], + state_dict[name.replace("to_q", "to_v")], + ], dim=0) + elif name.endswith("attn.to_k.weight") or name.endswith("attn.to_v.weight"): + continue + elif name.endswith("attn.to_out.0.weight"): + state_dict_[name.replace("attn.to_out.0.weight", "img_to_out.weight")] = state_dict[name] + elif name.endswith("attn.norm_q.weight"): + state_dict_[name.replace("attn.norm_q.weight", "img_norm_q.weight")] = state_dict[name] + elif name.endswith("attn.norm_k.weight"): + state_dict_[name.replace("attn.norm_k.weight", "img_norm_k.weight")] = state_dict[name] + elif name.endswith("attn.norm_added_q.weight"): + state_dict_[name.replace("attn.norm_added_q.weight", "txt_norm_q.weight")] = state_dict[name] + elif name.endswith("attn.norm_added_k.weight"): + state_dict_[name.replace("attn.norm_added_k.weight", "txt_norm_k.weight")] = state_dict[name] + elif name.endswith("attn.to_add_out.weight"): + state_dict_[name.replace("attn.to_add_out.weight", "txt_to_out.weight")] = state_dict[name] + elif name.endswith("attn.add_q_proj.weight"): + state_dict_[name.replace("add_q_proj", "txt_to_qkv").replace(".attn.", ".")] = torch.concat([ + state_dict[name.replace("add_q_proj", "add_q_proj")], + state_dict[name.replace("add_q_proj", "add_k_proj")], + state_dict[name.replace("add_q_proj", "add_v_proj")], + ], dim=0) + elif ".ff." in name: + state_dict_[name.replace(".ff.", ".img_ff.")] = state_dict[name] + elif ".ff_context." in name: + state_dict_[name.replace(".ff_context.", ".txt_ff.")] = state_dict[name] + elif name.endswith("attn.add_k_proj.weight") or name.endswith("attn.add_v_proj.weight"): + continue + else: + state_dict_[name] = state_dict[name] + elif name.startswith("single_transformer_blocks"): + state_dict_[name.replace(".attn.", ".")] = state_dict[name] + else: + state_dict_[name] = state_dict[name] + return state_dict_ + + +state_dict = load_state_dict("xxx.safetensors") +save_file(state_dict, "yyy.safetensors") diff --git a/examples/flux2/model_training/train.py b/examples/flux2/model_training/train.py index 6101687db..7a152672a 100644 --- a/examples/flux2/model_training/train.py +++ b/examples/flux2/model_training/train.py @@ -18,6 +18,7 @@ def __init__( extra_inputs=None, fp8_models=None, offload_models=None, + skill_model_id_or_path=None, device="cpu", task="sft", ): @@ -26,6 +27,7 @@ def __init__( model_configs = self.parse_model_configs(model_paths, model_id_with_origin_paths, fp8_models=fp8_models, offload_models=offload_models, device=device) tokenizer_config = self.parse_path_or_model_id(tokenizer_path, default_value=ModelConfig(model_id="black-forest-labs/FLUX.2-dev", origin_file_pattern="tokenizer/")) self.pipe = Flux2ImagePipeline.from_pretrained(torch_dtype=torch.bfloat16, device=device, model_configs=model_configs, tokenizer_config=tokenizer_config) + self.pipe = self.load_training_skill_model(self.pipe, skill_model_id_or_path) self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model) # Training mode @@ -126,6 +128,7 @@ def flux2_parser(): extra_inputs=args.extra_inputs, fp8_models=args.fp8_models, offload_models=args.offload_models, + skill_model_id_or_path=args.skill_model_id_or_path, task=args.task, device="cpu" if args.initialize_model_on_cpu else accelerator.device, ) From 9f8c352a15e4110e929cda99c79f9d3322dfa28f Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Wed, 8 Apr 2026 15:25:33 +0800 Subject: [PATCH 02/12] Diffusion Templates framework --- diffsynth/diffusion/base_pipeline.py | 21 +- diffsynth/diffusion/parsers.py | 6 +- diffsynth/diffusion/skills.py | 137 ---------- diffsynth/diffusion/template.py | 176 ++++++++++++ diffsynth/diffusion/training_module.py | 49 ++-- diffsynth/pipelines/flux2_image.py | 43 ++- .../FLUX.2-klein-base-4B-skills.py | 56 ---- .../model_inference/Template-KleinBase4B.py | 256 ++++++++++++++++++ ...e-4B-skills.sh => Template-KleinBase4B.sh} | 17 +- examples/flux2/model_training/train.py | 6 +- 10 files changed, 526 insertions(+), 241 deletions(-) delete mode 100644 diffsynth/diffusion/skills.py create mode 100644 diffsynth/diffusion/template.py delete mode 100644 examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B.py rename examples/flux2/model_training/full/{FLUX.2-klein-base-4B-skills.sh => Template-KleinBase4B.sh} (53%) diff --git a/diffsynth/diffusion/base_pipeline.py b/diffsynth/diffusion/base_pipeline.py index 52f1f02b4..f7dc5abe3 100644 --- a/diffsynth/diffusion/base_pipeline.py +++ b/diffsynth/diffusion/base_pipeline.py @@ -9,7 +9,7 @@ from ..models.model_loader import ModelPool from ..utils.controlnet import ControlNetInput from ..core.device import get_device_name, IS_NPU_AVAILABLE -from .skills import load_skill_model, load_skill_data_processor +from .template import load_template_model, load_template_data_processor class PipelineUnit: @@ -320,14 +320,21 @@ def check_vram_management_state(self): def cfg_guided_model_fn(self, model_fn, cfg_scale, inputs_shared, inputs_posi, inputs_nega, **inputs_others): + # Positive side forward if inputs_shared.get("positive_only_lora", None) is not None: - self.clear_lora(verbose=0) self.load_lora(self.dit, state_dict=inputs_shared["positive_only_lora"], verbose=0) noise_pred_posi = model_fn(**inputs_posi, **inputs_shared, **inputs_others) + if inputs_shared.get("positive_only_lora", None) is not None: + self.clear_lora(verbose=0) + if cfg_scale != 1.0: - if inputs_shared.get("positive_only_lora", None) is not None: - self.clear_lora(verbose=0) + # Negative side forward + if inputs_shared.get("negative_only_lora", None) is not None: + self.load_lora(self.dit, state_dict=inputs_shared["negative_only_lora"], verbose=0) noise_pred_nega = model_fn(**inputs_nega, **inputs_shared, **inputs_others) + if inputs_shared.get("negative_only_lora", None) is not None: + self.clear_lora(verbose=0) + if isinstance(noise_pred_posi, tuple): # Separately handling different output types of latents, eg. video and audio latents. noise_pred = tuple( @@ -341,11 +348,11 @@ def cfg_guided_model_fn(self, model_fn, cfg_scale, inputs_shared, inputs_posi, i return noise_pred - def load_training_skill_model(self, model_config: ModelConfig = None): + def load_training_template_model(self, model_config: ModelConfig = None): if model_config is not None: model_config.download_if_necessary() - self.skill_model = load_skill_model(model_config.path, torch_dtype=self.torch_dtype, device=self.device) - self.skill_data_processor = load_skill_data_processor(model_config.path)() + self.template_model = load_template_model(model_config.path, torch_dtype=self.torch_dtype, device=self.device) + self.template_data_processor = load_template_data_processor(model_config.path)() diff --git a/diffsynth/diffusion/parsers.py b/diffsynth/diffusion/parsers.py index 9dc90e82c..3bcbe4ba3 100644 --- a/diffsynth/diffusion/parsers.py +++ b/diffsynth/diffusion/parsers.py @@ -60,8 +60,8 @@ def add_gradient_config(parser: argparse.ArgumentParser): parser.add_argument("--gradient_accumulation_steps", type=int, default=1, help="Gradient accumulation steps.") return parser -def add_skill_model_config(parser: argparse.ArgumentParser): - parser.add_argument("--skill_model_id_or_path", type=str, default=None, help="Model ID of path of skill models.") +def add_template_model_config(parser: argparse.ArgumentParser): + parser.add_argument("--template_model_id_or_path", type=str, default=None, help="Model ID of path of template models.") return parser def add_general_config(parser: argparse.ArgumentParser): @@ -71,5 +71,5 @@ def add_general_config(parser: argparse.ArgumentParser): parser = add_output_config(parser) parser = add_lora_config(parser) parser = add_gradient_config(parser) - parser = add_skill_model_config(parser) + parser = add_template_model_config(parser) return parser diff --git a/diffsynth/diffusion/skills.py b/diffsynth/diffusion/skills.py deleted file mode 100644 index ced2fe49b..000000000 --- a/diffsynth/diffusion/skills.py +++ /dev/null @@ -1,137 +0,0 @@ -import torch, os, importlib, warnings, json -from typing import Dict, List, Tuple, Union -from ..core import ModelConfig, load_model -from ..core.device.npu_compatible_device import get_device_type - - -SkillCache = Dict[str, Tuple[torch.Tensor, torch.Tensor]] - - -class SkillModel(torch.nn.Module): - def __init__(self): - super().__init__() - - @torch.no_grad() - def process_inputs(self, pipe=None, **kwargs): - return {} - - def forward(self, **kwargs) -> SkillCache: - raise NotImplementedError() - - -class MultiSkillModel(SkillModel): - def __init__(self, models: List[SkillModel]): - super().__init__() - if not isinstance(models, list): - models = [models] - self.models = torch.nn.ModuleList(models) - - def merge(self, kv_cache_list: List[SkillCache]) -> SkillCache: - names = {} - for kv_cache in kv_cache_list: - for name in kv_cache: - names[name] = None - kv_cache_merged = {} - for name in names: - kv_list = [kv_cache.get(name) for kv_cache in kv_cache_list] - kv_list = [kv for kv in kv_list if kv is not None] - if len(kv_list) > 0: - k = torch.concat([kv[0] for kv in kv_list], dim=1) - v = torch.concat([kv[1] for kv in kv_list], dim=1) - kv_cache_merged[name] = (k, v) - return kv_cache_merged - - @torch.no_grad() - def process_inputs(self, pipe=None, inputs: List[Dict] = None, **kwargs): - return [(i["model_id"], self.models[i["model_id"]].process_inputs(pipe=pipe, **i)) for i in inputs] - - def forward(self, inputs: List[Tuple[int, Dict]], **kwargs) -> SkillCache: - kv_cache_list = [] - for model_id, model_inputs in inputs: - kv_cache = self.models[model_id](**model_inputs) - kv_cache_list.append(kv_cache) - return self.merge(kv_cache_list) - - -def load_skill_model(path, torch_dtype=torch.bfloat16, device="cuda", verbose=1): - spec = importlib.util.spec_from_file_location("skill_model", os.path.join(path, "model.py")) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - model = load_model( - model_class=getattr(module, 'SKILL_MODEL'), - config=getattr(module, 'SKILL_MODEL_CONFIG') if hasattr(module, 'SKILL_MODEL_CONFIG') else None, - path=os.path.join(path, getattr(module, 'SKILL_MODEL_PATH')), - torch_dtype=torch_dtype, - device=device, - ) - if verbose > 0: - metadata = { - "model_architecture": getattr(module, 'SKILL_MODEL').__name__, - "code_path": os.path.join(path, "model.py"), - "weight_path": os.path.join(path, getattr(module, 'SKILL_MODEL_PATH')), - } - print(f"Skill model loaded: {json.dumps(metadata, indent=4)}") - return model - - -def load_skill_data_processor(path): - spec = importlib.util.spec_from_file_location("skill_model", os.path.join(path, "model.py")) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - if hasattr(module, 'SKILL_DATA_PROCESSOR'): - processor = getattr(module, 'SKILL_DATA_PROCESSOR') - return processor - else: - return None - - -class SkillsPipeline(MultiSkillModel): - def __init__(self, models: List[SkillModel]): - super().__init__(models) - - @staticmethod - def check_vram_config(model_config: ModelConfig): - params = [ - model_config.offload_device, model_config.offload_dtype, - model_config.onload_device, model_config.onload_dtype, - model_config.preparing_device, model_config.preparing_dtype, - model_config.computation_device, model_config.computation_dtype, - ] - for param in params: - if param is not None: - warnings.warn("SkillsPipeline doesn't support VRAM management. VRAM config will be ignored.") - - @staticmethod - def from_pretrained( - torch_dtype: torch.dtype = torch.bfloat16, - device: Union[str, torch.device] = get_device_type(), - model_configs: list[ModelConfig] = [], - ): - models = [] - for model_config in model_configs: - SkillsPipeline.check_vram_config(model_config) - model_config.download_if_necessary() - model = load_skill_model(model_config.path, torch_dtype=torch_dtype, device=device) - models.append(model) - pipe = SkillsPipeline(models) - return pipe - - def call_single_side(self, pipe = None, inputs: List[Dict] = None): - inputs = self.process_inputs(pipe=pipe, inputs=inputs) - skill_cache = self.forward(inputs) - return skill_cache - - @torch.no_grad() - def __call__( - self, - pipe = None, - inputs: List[Dict] = None, - positive_inputs: List[Dict] = None, - negative_inputs: List[Dict] = None, - ): - shared_cache = self.call_single_side(pipe=pipe, inputs=inputs or []) - positive_cache = self.call_single_side(pipe=pipe, inputs=positive_inputs or []) - negative_cache = self.call_single_side(pipe=pipe, inputs=negative_inputs or []) - positive_cache = self.merge([positive_cache, shared_cache]) - negative_cache = self.merge([negative_cache, shared_cache]) - return {"skill_cache": positive_cache, "negative_skill_cache": negative_cache} diff --git a/diffsynth/diffusion/template.py b/diffsynth/diffusion/template.py new file mode 100644 index 000000000..6b9a53fd9 --- /dev/null +++ b/diffsynth/diffusion/template.py @@ -0,0 +1,176 @@ +import torch, os, importlib, warnings, json, inspect +from typing import Dict, List, Tuple, Union +from ..core import ModelConfig, load_model +from ..core.device.npu_compatible_device import get_device_type + + +KVCache = Dict[str, Tuple[torch.Tensor, torch.Tensor]] + + +class TemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + + @torch.no_grad() + def process_inputs(self, **kwargs): + return {} + + def forward(self, **kwargs): + raise NotImplementedError() + + +def check_template_model_format(model): + if not hasattr(model, "process_inputs"): + raise NotImplementedError("`process_inputs` is not implemented in the Template model.") + if "kwargs" not in inspect.signature(model.process_inputs).parameters: + raise NotImplementedError("`**kwargs` is not included in `process_inputs`.") + if not hasattr(model, "forward"): + raise NotImplementedError("`forward` is not implemented in the Template model.") + if "kwargs" not in inspect.signature(model.forward).parameters: + raise NotImplementedError("`**kwargs` is not included in `forward`.") + + +def load_template_model(path, torch_dtype=torch.bfloat16, device="cuda", verbose=1): + spec = importlib.util.spec_from_file_location("template_model", os.path.join(path, "model.py")) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + template_model_path = getattr(module, 'TEMPLATE_MODEL_PATH') if hasattr(module, 'TEMPLATE_MODEL_PATH') else None + if template_model_path is not None: + # With `TEMPLATE_MODEL_PATH`, a pretrained model will be loaded. + model = load_model( + model_class=getattr(module, 'TEMPLATE_MODEL'), + config=getattr(module, 'TEMPLATE_MODEL_CONFIG') if hasattr(module, 'TEMPLATE_MODEL_CONFIG') else None, + path=os.path.join(path, getattr(module, 'TEMPLATE_MODEL_PATH')), + torch_dtype=torch_dtype, + device=device, + ) + else: + # Without `TEMPLATE_MODEL_PATH`, a randomly initialized model or a non-model module will be loaded. + model = module.TEMPLATE_MODEL() + if hasattr(model, "to"): + model = model.to(dtype=torch_dtype, device=device) + if hasattr(model, "eval"): + model = model.eval() + check_template_model_format(model) + if verbose > 0: + metadata = { + "model_architecture": getattr(module, 'TEMPLATE_MODEL').__name__, + "code_path": os.path.join(path, "model.py"), + "weight_path": template_model_path, + } + print(f"Template model loaded: {json.dumps(metadata, indent=4)}") + return model + + +def load_template_data_processor(path): + spec = importlib.util.spec_from_file_location("template_model", os.path.join(path, "model.py")) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if hasattr(module, 'TEMPLATE_DATA_PROCESSOR'): + processor = getattr(module, 'TEMPLATE_DATA_PROCESSOR') + return processor + else: + return None + + +class TemplatePipeline(torch.nn.Module): + def __init__(self, models: List[TemplateModel]): + super().__init__() + self.models = torch.nn.ModuleList(models) + + def merge_kv_cache(self, kv_cache_list: List[KVCache]) -> KVCache: + names = {} + for kv_cache in kv_cache_list: + for name in kv_cache: + names[name] = None + kv_cache_merged = {} + for name in names: + kv_list = [kv_cache.get(name) for kv_cache in kv_cache_list] + kv_list = [kv for kv in kv_list if kv is not None] + if len(kv_list) > 0: + k = torch.concat([kv[0] for kv in kv_list], dim=1) + v = torch.concat([kv[1] for kv in kv_list], dim=1) + kv_cache_merged[name] = (k, v) + return kv_cache_merged + + def merge_template_cache(self, template_cache_list): + params = sorted(list(set(sum([list(template_cache.keys()) for template_cache in template_cache_list], [])))) + template_cache_merged = {} + for param in params: + data = [template_cache[param] for template_cache in template_cache_list if param in template_cache] + if param == "kv_cache": + data = self.merge_kv_cache(data) + elif len(data) == 1: + data = data[0] + else: + print(f"Conflict detected: `{param}` appears in the outputs of multiple Template models. Only the first one will be retained.") + data = data[0] + template_cache_merged[param] = data + return template_cache_merged + + @staticmethod + def check_vram_config(model_config: ModelConfig): + params = [ + model_config.offload_device, model_config.offload_dtype, + model_config.onload_device, model_config.onload_dtype, + model_config.preparing_device, model_config.preparing_dtype, + model_config.computation_device, model_config.computation_dtype, + ] + for param in params: + if param is not None: + warnings.warn("TemplatePipeline doesn't support VRAM management. VRAM config will be ignored.") + + @staticmethod + def from_pretrained( + torch_dtype: torch.dtype = torch.bfloat16, + device: Union[str, torch.device] = get_device_type(), + model_configs: list[ModelConfig] = [], + ): + models = [] + for model_config in model_configs: + TemplatePipeline.check_vram_config(model_config) + model_config.download_if_necessary() + model = load_template_model(model_config.path, torch_dtype=torch_dtype, device=device) + models.append(model) + pipe = TemplatePipeline(models) + return pipe + + @torch.no_grad() + def process_inputs(self, inputs: List[Dict], pipe=None, **kwargs): + return [(i.get("model_id", 0), self.models[i.get("model_id", 0)].process_inputs(pipe=pipe, **i)) for i in inputs] + + def forward(self, inputs: List[Tuple[int, Dict]], pipe=None, **kwargs): + template_cache = [] + for model_id, model_inputs in inputs: + kv_cache = self.models[model_id](pipe=pipe, **model_inputs) + template_cache.append(kv_cache) + return template_cache + + def call_single_side(self, pipe=None, inputs: List[Dict] = None): + inputs = self.process_inputs(pipe=pipe, inputs=inputs) + template_cache = self.forward(pipe=pipe, inputs=inputs) + template_cache = self.merge_template_cache(template_cache) + return template_cache + + @torch.no_grad() + def __call__( + self, + pipe=None, + template_inputs: List[Dict] = None, + negative_template_inputs: List[Dict] = None, + **kwargs, + ): + template_cache = self.call_single_side(pipe=pipe, inputs=template_inputs or []) + negative_template_cache = self.call_single_side(pipe=pipe, inputs=negative_template_inputs or []) + required_params = list(inspect.signature(pipe.__call__).parameters.keys()) + for param in template_cache: + if param in required_params: + kwargs[param] = template_cache[param] + else: + print(f"`{param}` is not included in the inputs of `{pipe.__class__.__name__}`. This parameter will be ignored.") + for param in negative_template_cache: + if "negative_" + param in required_params: + kwargs["negative_" + param] = negative_template_cache[param] + else: + print(f"`{'negative_' + param}` is not included in the inputs of `{pipe.__class__.__name__}`. This parameter will be ignored.") + return pipe(**kwargs) diff --git a/diffsynth/diffusion/training_module.py b/diffsynth/diffusion/training_module.py index 37c90d050..e1d3852dd 100644 --- a/diffsynth/diffusion/training_module.py +++ b/diffsynth/diffusion/training_module.py @@ -28,38 +28,45 @@ def process(self, pipe, inputs_shared, inputs_posi, inputs_nega): return inputs_shared, inputs_posi, inputs_nega -class GeneralUnit_SkillProcessInputs(PipelineUnit): +class GeneralUnit_TemplateProcessInputs(PipelineUnit): # Only used for training def __init__(self, data_processor): super().__init__( - input_params=("skill_inputs",), - output_params=("skill_inputs",), + input_params=("template_inputs",), + output_params=("template_inputs",), ) self.data_processor = data_processor - def process(self, pipe, skill_inputs): - if not hasattr(pipe, "skill_model"): + def process(self, pipe, template_inputs): + if not hasattr(pipe, "template_model"): return {} if self.data_processor is not None: - skill_inputs = self.data_processor(**skill_inputs) - skill_inputs = pipe.skill_model.process_inputs(pipe=pipe, **skill_inputs) - return {"skill_inputs": skill_inputs} + template_inputs = self.data_processor(**template_inputs) + template_inputs = pipe.template_model.process_inputs(pipe=pipe, **template_inputs) + return {"template_inputs": template_inputs} -class GeneralUnit_SkillForward(PipelineUnit): +class GeneralUnit_TemplateForward(PipelineUnit): # Only used for training - def __init__(self): + def __init__(self, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False): super().__init__( - input_params=("skill_inputs",), - output_params=("skill_cache",), - onload_model_names=("skill_model",) + input_params=("template_inputs",), + output_params=("kv_cache",), + onload_model_names=("template_model",) ) + self.use_gradient_checkpointing = use_gradient_checkpointing + self.use_gradient_checkpointing_offload = use_gradient_checkpointing_offload - def process(self, pipe, skill_inputs): - if not hasattr(pipe, "skill_model"): + def process(self, pipe, template_inputs): + if not hasattr(pipe, "template_model"): return {} - skill_cache = pipe.skill_model.forward(**skill_inputs) - return {"skill_cache": skill_cache} + template_cache = pipe.template_model.forward( + **template_inputs, + pipe=pipe, + use_gradient_checkpointing=self.use_gradient_checkpointing, + use_gradient_checkpointing_offload=self.use_gradient_checkpointing_offload, + ) + return template_cache class DiffusionTrainingModule(torch.nn.Module): @@ -246,13 +253,13 @@ def parse_lora_target_modules(self, model, lora_target_modules): return lora_target_modules - def load_training_skill_model(self, pipe, path_or_model_id): + def load_training_template_model(self, pipe, path_or_model_id, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False): if path_or_model_id is None: return pipe model_config = self.parse_path_or_model_id(path_or_model_id) - pipe.load_training_skill_model(model_config) - pipe.units.append(GeneralUnit_SkillProcessInputs(pipe.skill_data_processor)) - pipe.units.append(GeneralUnit_SkillForward()) + pipe.load_training_template_model(model_config) + pipe.units.append(GeneralUnit_TemplateProcessInputs(pipe.template_data_processor)) + pipe.units.append(GeneralUnit_TemplateForward(use_gradient_checkpointing, use_gradient_checkpointing_offload)) return pipe diff --git a/diffsynth/pipelines/flux2_image.py b/diffsynth/pipelines/flux2_image.py index 9dda9bf51..4bec2417f 100644 --- a/diffsynth/pipelines/flux2_image.py +++ b/diffsynth/pipelines/flux2_image.py @@ -40,6 +40,7 @@ def __init__(self, device=get_device_type(), torch_dtype=torch.bfloat16): Flux2Unit_InputImageEmbedder(), Flux2Unit_EditImageEmbedder(), Flux2Unit_ImageIDs(), + Flux2Unit_Inpaint(), ] self.model_fn = model_fn_flux2 @@ -94,8 +95,15 @@ def __call__( # Steps num_inference_steps: int = 30, # KV Cache - skill_cache = None, - negative_skill_cache = None, + kv_cache = None, + negative_kv_cache = None, + # LoRA + lora = None, + negative_lora = None, + # Inpaint + inpaint_mask: Image.Image = None, + inpaint_blur_size: int = None, + inpaint_blur_sigma: float = None, # Progress bar progress_bar_cmd = tqdm, ): @@ -104,11 +112,11 @@ def __call__( # Parameters inputs_posi = { "prompt": prompt, - "skill_cache": skill_cache, + "kv_cache": kv_cache, } inputs_nega = { "negative_prompt": negative_prompt, - "skill_cache": negative_skill_cache, + "kv_cache": negative_kv_cache, } inputs_shared = { "cfg_scale": cfg_scale, "embedded_guidance": embedded_guidance, @@ -117,6 +125,9 @@ def __call__( "height": height, "width": width, "seed": seed, "rand_device": rand_device, "initial_noise": initial_noise, "num_inference_steps": num_inference_steps, + "positive_only_lora": lora, + "negative_only_lora": negative_lora, + "inpaint_mask": inpaint_mask, "inpaint_blur_size": inpaint_blur_size, "inpaint_blur_sigma": inpaint_blur_sigma, } for unit in self.units: inputs_shared, inputs_posi, inputs_nega = self.unit_runner(unit, self, inputs_shared, inputs_posi, inputs_nega) @@ -565,6 +576,26 @@ def process(self, pipe: Flux2ImagePipeline, height, width): return {"image_ids": image_ids} +class Flux2Unit_Inpaint(PipelineUnit): + def __init__(self): + super().__init__( + input_params=("inpaint_mask", "height", "width", "inpaint_blur_size", "inpaint_blur_sigma"), + output_params=("inpaint_mask",), + ) + + def process(self, pipe: Flux2ImagePipeline, inpaint_mask, height, width, inpaint_blur_size, inpaint_blur_sigma): + if inpaint_mask is None: + return {} + inpaint_mask = pipe.preprocess_image(inpaint_mask.convert("RGB").resize((width // 16, height // 16)), min_value=0, max_value=1) + inpaint_mask = inpaint_mask.mean(dim=1, keepdim=True) + if inpaint_blur_size is not None and inpaint_blur_sigma is not None: + from torchvision.transforms import GaussianBlur + blur = GaussianBlur(kernel_size=inpaint_blur_size * 2 + 1, sigma=inpaint_blur_sigma) + inpaint_mask = blur(inpaint_mask) + inpaint_mask = rearrange(inpaint_mask, "B C H W -> B (H W) C") + return {"inpaint_mask": inpaint_mask} + + def model_fn_flux2( dit: Flux2DiT, latents=None, @@ -575,7 +606,7 @@ def model_fn_flux2( image_ids=None, edit_latents=None, edit_image_ids=None, - skill_cache=None, + kv_cache=None, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False, **kwargs, @@ -593,7 +624,7 @@ def model_fn_flux2( encoder_hidden_states=prompt_embeds, txt_ids=text_ids, img_ids=image_ids, - kv_cache=skill_cache, + kv_cache=kv_cache, use_gradient_checkpointing=use_gradient_checkpointing, use_gradient_checkpointing_offload=use_gradient_checkpointing_offload, ) diff --git a/examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py b/examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py deleted file mode 100644 index fcf799261..000000000 --- a/examples/flux2/model_inference/FLUX.2-klein-base-4B-skills.py +++ /dev/null @@ -1,56 +0,0 @@ -from diffsynth.diffusion.skills import SkillsPipeline -from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig -import torch -from PIL import Image - - -pipe = Flux2ImagePipeline.from_pretrained( - torch_dtype=torch.bfloat16, - device="cuda", - model_configs=[ - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), - ], - tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), -) -skills = SkillsPipeline.from_pretrained( - torch_dtype=torch.bfloat16, - device="cuda", - model_configs=[ - ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Skills-ControlNet"), - ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Skills-Brightness"), - ], -) -skill_cache = skills( - positive_inputs = [ - { - "model_id": 0, - "image": Image.open("xxx.jpg"), - "prompt": "一位长发少女,四周环绕着魔法粒子", - }, - { - "model_id": 1, - "scale": 0.6, - }, - ], - negative_inputs = [ - { - "model_id": 0, - "image": Image.open("xxx.jpg"), - "prompt": "一位长发少女,四周环绕着魔法粒子", - }, - { - "model_id": 1, - "scale": 0.5, - }, - ], - pipe=pipe, -) -image = pipe( - prompt="一位长发少女,四周环绕着魔法粒子", - seed=0, rand_device="cuda", num_inference_steps=50, cfg_scale=4, - height=1024, width=1024, - **skill_cache, -) -image.save("image.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B.py b/examples/flux2/model_inference/Template-KleinBase4B.py new file mode 100644 index 000000000..5b2dd931f --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B.py @@ -0,0 +1,256 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from PIL import Image +import numpy as np + +def load_template_pipeline(model_ids): + template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id=model_id) for model_id in model_ids], + ) + return template + +# Base Model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +# image = pipe( +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# ) +# image.save("image_base.jpg") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Brightness"]) +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{"scale": 0.7}], +# negative_template_inputs = [{"scale": 0.5}] +# ) +# image.save("image_Brightness_light.jpg") +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{"scale": 0.5}], +# negative_template_inputs = [{"scale": 0.5}] +# ) +# image.save("image_Brightness_normal.jpg") +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{"scale": 0.3}], +# negative_template_inputs = [{"scale": 0.5}] +# ) +# image.save("image_Brightness_dark.jpg") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-ControlNet"]) +# image = template( +# pipe, +# prompt="A cat is sitting on a stone, bathed in bright sunshine.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_depth.jpg"), +# "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_depth.jpg"), +# "prompt": "", +# }], +# ) +# image.save("image_ControlNet_sunshine.jpg") +# image = template( +# pipe, +# prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_depth.jpg"), +# "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_depth.jpg"), +# "prompt": "", +# }], +# ) +# image.save("image_ControlNet_magic.jpg") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Edit"]) +# image = template( +# pipe, +# prompt="Put a hat on this cat.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "prompt": "Put a hat on this cat.", +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "prompt": "", +# }], +# ) +# image.save("image_Edit_hat.jpg") +# image = template( +# pipe, +# prompt="Make the cat turn its head to look to the right.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "prompt": "Make the cat turn its head to look to the right.", +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "prompt": "", +# }], +# ) +# image.save("image_Edit_head.jpg") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Upscaler"]) +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_lowres_512.jpg"), +# "prompt": "A cat is sitting on a stone.", +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_lowres_512.jpg"), +# "prompt": "", +# }], +# ) +# image.save("image_Upscaler_1.png") +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_lowres_100.jpg"), +# "prompt": "A cat is sitting on a stone.", +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_lowres_100.jpg"), +# "prompt": "", +# }], +# ) +# image.save("image_Upscaler_2.png") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-SoftRGB"]) +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "R": 128/255, +# "G": 128/255, +# "B": 128/255 +# }], +# ) +# image.save("image_rgb_normal.jpg") +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "R": 208/255, +# "G": 185/255, +# "B": 138/255 +# }], +# ) +# image.save("image_rgb_warm.jpg") +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "R": 94/255, +# "G": 163/255, +# "B": 174/255 +# }], +# ) +# image.save("image_rgb_cold.jpg") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-PandaMeme"]) +# image = template( +# pipe, +# prompt="A meme with a sleepy expression.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{}], +# negative_template_inputs = [{}], +# ) +# image.save("image_PandaMeme_sleepy.jpg") +# image = template( +# pipe, +# prompt="A meme with a happy expression.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{}], +# negative_template_inputs = [{}], +# ) +# image.save("image_PandaMeme_happy.jpg") +# image = template( +# pipe, +# prompt="A meme with a surprised expression.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{}], +# negative_template_inputs = [{}], +# ) +# image.save("image_PandaMeme_surprised.jpg") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Sharpness"]) +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{"scale": 0.1}], +# negative_template_inputs = [{"scale": 0.5}], +# ) +# image.save("image_Sharpness_0.1.jpg") +# image = template( +# pipe, +# prompt="A cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{"scale": 0.8}], +# negative_template_inputs = [{"scale": 0.5}], +# ) +# image.save("image_Sharpness_0.8.jpg") + +# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Inpaint"]) +# image = template( +# pipe, +# prompt="An orange cat is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "mask": Image.open("data/assets/image_mask_1.jpg"), +# "force_inpaint": True, +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "mask": Image.open("data/assets/image_mask_1.jpg"), +# }], +# ) +# image.save("image_Inpaint_1.jpg") +# image = template( +# pipe, +# prompt="A cat wearing sunglasses is sitting on a stone.", +# seed=0, cfg_scale=4, num_inference_steps=50, +# template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "mask": Image.open("data/assets/image_mask_2.jpg"), +# }], +# negative_template_inputs = [{ +# "image": Image.open("data/assets/image_reference.jpg"), +# "mask": Image.open("data/assets/image_mask_2.jpg"), +# }], +# ) +# image.save("image_Inpaint_2.jpg") diff --git a/examples/flux2/model_training/full/FLUX.2-klein-base-4B-skills.sh b/examples/flux2/model_training/full/Template-KleinBase4B.sh similarity index 53% rename from examples/flux2/model_training/full/FLUX.2-klein-base-4B-skills.sh rename to examples/flux2/model_training/full/Template-KleinBase4B.sh index d56634b62..093f1ef79 100644 --- a/examples/flux2/model_training/full/FLUX.2-klein-base-4B-skills.sh +++ b/examples/flux2/model_training/full/Template-KleinBase4B.sh @@ -1,16 +1,17 @@ accelerate launch examples/flux2/model_training/train.py \ - --dataset_base_path /mnt/nas1/duanzhongjie.dzj/dataset/ImagePulseV2 \ - --dataset_metadata_path /mnt/nas1/duanzhongjie.dzj/dataset/ImagePulseV2/metadata_example_ti2ti.jsonl \ - --extra_inputs "skill_inputs" \ + --dataset_base_path xxx \ + --dataset_metadata_path xxx/metadata.jsonl \ + --extra_inputs "template_inputs" \ --max_pixels 1048576 \ --dataset_repeat 1 \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ - --skill_model_id_or_path "models/base" \ + --template_model_id_or_path "xxx" \ --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ --learning_rate 1e-4 \ --num_epochs 999 \ - --remove_prefix_in_ckpt "pipe.skill_model." \ - --output_path "./models/train/FLUX.2-klein-base-4B-skills_full" \ - --trainable_models "skill_model" \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B_full" \ + --trainable_models "template_model" \ + --save_steps 1000 \ --use_gradient_checkpointing \ - --save_steps 200 + --find_unused_parameters diff --git a/examples/flux2/model_training/train.py b/examples/flux2/model_training/train.py index 7a152672a..5378da4e2 100644 --- a/examples/flux2/model_training/train.py +++ b/examples/flux2/model_training/train.py @@ -18,7 +18,7 @@ def __init__( extra_inputs=None, fp8_models=None, offload_models=None, - skill_model_id_or_path=None, + template_model_id_or_path=None, device="cpu", task="sft", ): @@ -27,7 +27,7 @@ def __init__( model_configs = self.parse_model_configs(model_paths, model_id_with_origin_paths, fp8_models=fp8_models, offload_models=offload_models, device=device) tokenizer_config = self.parse_path_or_model_id(tokenizer_path, default_value=ModelConfig(model_id="black-forest-labs/FLUX.2-dev", origin_file_pattern="tokenizer/")) self.pipe = Flux2ImagePipeline.from_pretrained(torch_dtype=torch.bfloat16, device=device, model_configs=model_configs, tokenizer_config=tokenizer_config) - self.pipe = self.load_training_skill_model(self.pipe, skill_model_id_or_path) + self.pipe = self.load_training_template_model(self.pipe, template_model_id_or_path, args.use_gradient_checkpointing, args.use_gradient_checkpointing_offload) self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model) # Training mode @@ -128,7 +128,7 @@ def flux2_parser(): extra_inputs=args.extra_inputs, fp8_models=args.fp8_models, offload_models=args.offload_models, - skill_model_id_or_path=args.skill_model_id_or_path, + template_model_id_or_path=args.template_model_id_or_path, task=args.task, device="cpu" if args.initialize_model_on_cpu else accelerator.device, ) From 59b4bbb62c47bfbcb0187f581bd7cf4374ff1c18 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Wed, 15 Apr 2026 14:07:51 +0800 Subject: [PATCH 03/12] update template framework --- diffsynth/diffusion/base_pipeline.py | 19 +++++++- diffsynth/diffusion/loss.py | 5 ++ diffsynth/diffusion/parsers.py | 1 + diffsynth/diffusion/template.py | 66 ++++++++++++++++++-------- diffsynth/diffusion/training_module.py | 4 +- diffsynth/pipelines/flux2_image.py | 11 +++++ examples/flux2/model_training/train.py | 3 ++ 7 files changed, 85 insertions(+), 24 deletions(-) diff --git a/diffsynth/diffusion/base_pipeline.py b/diffsynth/diffusion/base_pipeline.py index f7dc5abe3..b731bc8e7 100644 --- a/diffsynth/diffusion/base_pipeline.py +++ b/diffsynth/diffusion/base_pipeline.py @@ -3,7 +3,7 @@ import numpy as np from einops import repeat, reduce from typing import Union -from ..core import AutoTorchModule, AutoWrappedLinear, load_state_dict, ModelConfig, parse_device_type +from ..core import AutoTorchModule, AutoWrappedLinear, load_state_dict, ModelConfig, parse_device_type, enable_vram_management from ..core.device.npu_compatible_device import get_device_type from ..utils.lora import GeneralLoRALoader from ..models.model_loader import ModelPool @@ -354,6 +354,23 @@ def load_training_template_model(self, model_config: ModelConfig = None): self.template_model = load_template_model(model_config.path, torch_dtype=self.torch_dtype, device=self.device) self.template_data_processor = load_template_data_processor(model_config.path)() + + def enable_lora_hot_loading(self, model: torch.nn.Module): + if hasattr(model, "vram_management_enabled") and getattr(model, "vram_management_enabled"): + return model + module_map = {torch.nn.Linear: AutoWrappedLinear} + vram_config = { + "offload_dtype": self.torch_dtype, + "offload_device": self.device, + "onload_dtype": self.torch_dtype, + "onload_device": self.device, + "preparing_dtype": self.torch_dtype, + "preparing_device": self.device, + "computation_dtype": self.torch_dtype, + "computation_device": self.device, + } + model = enable_vram_management(model, module_map, vram_config=vram_config) + return model class PipelineUnitGraph: diff --git a/diffsynth/diffusion/loss.py b/diffsynth/diffusion/loss.py index 10ad3a0d2..ee3e9883f 100644 --- a/diffsynth/diffusion/loss.py +++ b/diffsynth/diffusion/loss.py @@ -3,6 +3,11 @@ def FlowMatchSFTLoss(pipe: BasePipeline, **inputs): + if "lora" in inputs: + # Image-to-LoRA models need to load lora here. + pipe.clear_lora(verbose=0) + pipe.load_lora(pipe.dit, state_dict=inputs["lora"], hotload=True, verbose=0) + max_timestep_boundary = int(inputs.get("max_timestep_boundary", 1) * len(pipe.scheduler.timesteps)) min_timestep_boundary = int(inputs.get("min_timestep_boundary", 0) * len(pipe.scheduler.timesteps)) diff --git a/diffsynth/diffusion/parsers.py b/diffsynth/diffusion/parsers.py index 3bcbe4ba3..cf7246072 100644 --- a/diffsynth/diffusion/parsers.py +++ b/diffsynth/diffusion/parsers.py @@ -62,6 +62,7 @@ def add_gradient_config(parser: argparse.ArgumentParser): def add_template_model_config(parser: argparse.ArgumentParser): parser.add_argument("--template_model_id_or_path", type=str, default=None, help="Model ID of path of template models.") + parser.add_argument("--enable_lora_hot_loading", default=False, action="store_true", help="Whether to enable LoRA hot-loading. Only available for image-to-lora models.") return parser def add_general_config(parser: argparse.ArgumentParser): diff --git a/diffsynth/diffusion/template.py b/diffsynth/diffusion/template.py index 6b9a53fd9..9277dd7f5 100644 --- a/diffsynth/diffusion/template.py +++ b/diffsynth/diffusion/template.py @@ -2,6 +2,7 @@ from typing import Dict, List, Tuple, Union from ..core import ModelConfig, load_model from ..core.device.npu_compatible_device import get_device_type +from ..utils.lora.merge import merge_lora KVCache = Dict[str, Tuple[torch.Tensor, torch.Tensor]] @@ -74,9 +75,28 @@ def load_template_data_processor(path): class TemplatePipeline(torch.nn.Module): - def __init__(self, models: List[TemplateModel]): + def __init__( + self, + torch_dtype: torch.dtype = torch.bfloat16, + device: Union[str, torch.device] = get_device_type(), + model_configs: list[ModelConfig] = [], + lazy_loading: bool = False, + ): super().__init__() - self.models = torch.nn.ModuleList(models) + self.torch_dtype = torch_dtype + self.device = device + self.model_configs = model_configs + self.lazy_loading = lazy_loading + if lazy_loading: + self.models = None + else: + models = [] + for model_config in model_configs: + TemplatePipeline.check_vram_config(model_config) + model_config.download_if_necessary() + model = load_template_model(model_config.path, torch_dtype=torch_dtype, device=device) + models.append(model) + self.models = torch.nn.ModuleList(models) def merge_kv_cache(self, kv_cache_list: List[KVCache]) -> KVCache: names = {} @@ -100,6 +120,8 @@ def merge_template_cache(self, template_cache_list): data = [template_cache[param] for template_cache in template_cache_list if param in template_cache] if param == "kv_cache": data = self.merge_kv_cache(data) + elif param == "lora": + data = merge_lora(data) elif len(data) == 1: data = data[0] else: @@ -125,30 +147,32 @@ def from_pretrained( torch_dtype: torch.dtype = torch.bfloat16, device: Union[str, torch.device] = get_device_type(), model_configs: list[ModelConfig] = [], + lazy_loading: bool = False, ): - models = [] - for model_config in model_configs: - TemplatePipeline.check_vram_config(model_config) - model_config.download_if_necessary() - model = load_template_model(model_config.path, torch_dtype=torch_dtype, device=device) - models.append(model) - pipe = TemplatePipeline(models) + pipe = TemplatePipeline(torch_dtype, device, model_configs, lazy_loading) return pipe - @torch.no_grad() - def process_inputs(self, inputs: List[Dict], pipe=None, **kwargs): - return [(i.get("model_id", 0), self.models[i.get("model_id", 0)].process_inputs(pipe=pipe, **i)) for i in inputs] - - def forward(self, inputs: List[Tuple[int, Dict]], pipe=None, **kwargs): - template_cache = [] - for model_id, model_inputs in inputs: - kv_cache = self.models[model_id](pipe=pipe, **model_inputs) - template_cache.append(kv_cache) - return template_cache + def fetch_model(self, model_id): + if self.lazy_loading: + model_config = self.model_configs[model_id] + model_config.download_if_necessary() + model = load_template_model(model_config.path, torch_dtype=self.torch_dtype, device=self.device) + else: + model = self.models[model_id] + return model def call_single_side(self, pipe=None, inputs: List[Dict] = None): - inputs = self.process_inputs(pipe=pipe, inputs=inputs) - template_cache = self.forward(pipe=pipe, inputs=inputs) + model = None + onload_model_id = -1 + template_cache = [] + for i in inputs: + model_id = i.get("model_id", 0) + if model_id != onload_model_id: + model = self.fetch_model(model_id) + onload_model_id = model_id + cache = model.process_inputs(pipe=pipe, **i) + cache = model.forward(pipe=pipe, **cache) + template_cache.append(cache) template_cache = self.merge_template_cache(template_cache) return template_cache diff --git a/diffsynth/diffusion/training_module.py b/diffsynth/diffusion/training_module.py index e1d3852dd..844573f93 100644 --- a/diffsynth/diffusion/training_module.py +++ b/diffsynth/diffusion/training_module.py @@ -38,7 +38,7 @@ def __init__(self, data_processor): self.data_processor = data_processor def process(self, pipe, template_inputs): - if not hasattr(pipe, "template_model"): + if not hasattr(pipe, "template_model") or template_inputs is None: return {} if self.data_processor is not None: template_inputs = self.data_processor(**template_inputs) @@ -58,7 +58,7 @@ def __init__(self, use_gradient_checkpointing=False, use_gradient_checkpointing_ self.use_gradient_checkpointing_offload = use_gradient_checkpointing_offload def process(self, pipe, template_inputs): - if not hasattr(pipe, "template_model"): + if not hasattr(pipe, "template_model") or template_inputs is None: return {} template_cache = pipe.template_model.forward( **template_inputs, diff --git a/diffsynth/pipelines/flux2_image.py b/diffsynth/pipelines/flux2_image.py index 4bec2417f..a3c769426 100644 --- a/diffsynth/pipelines/flux2_image.py +++ b/diffsynth/pipelines/flux2_image.py @@ -100,6 +100,9 @@ def __call__( # LoRA lora = None, negative_lora = None, + # Text Embedding + extra_text_embedding = None, + negative_extra_text_embedding = None, # Inpaint inpaint_mask: Image.Image = None, inpaint_blur_size: int = None, @@ -113,10 +116,12 @@ def __call__( inputs_posi = { "prompt": prompt, "kv_cache": kv_cache, + "extra_text_embedding": extra_text_embedding, } inputs_nega = { "negative_prompt": negative_prompt, "kv_cache": negative_kv_cache, + "extra_text_embedding": negative_extra_text_embedding, } inputs_shared = { "cfg_scale": cfg_scale, "embedded_guidance": embedded_guidance, @@ -607,6 +612,7 @@ def model_fn_flux2( edit_latents=None, edit_image_ids=None, kv_cache=None, + extra_text_embedding=None, use_gradient_checkpointing=False, use_gradient_checkpointing_offload=False, **kwargs, @@ -617,6 +623,11 @@ def model_fn_flux2( latents = torch.concat([latents, edit_latents], dim=1) image_ids = torch.concat([image_ids, edit_image_ids], dim=1) embedded_guidance = torch.tensor([embedded_guidance], device=latents.device) + if extra_text_embedding is not None: + extra_text_ids = torch.zeros((1, extra_text_embedding.shape[1], 4), dtype=text_ids.dtype, device=text_ids.device) + extra_text_ids[:, :, -1] = torch.arange(prompt_embeds.shape[1], prompt_embeds.shape[1] + extra_text_embedding.shape[1]) + prompt_embeds = torch.concat([prompt_embeds, extra_text_embedding], dim=1) + text_ids = torch.concat([text_ids, extra_text_ids], dim=1) model_output = dit( hidden_states=latents, timestep=timestep / 1000, diff --git a/examples/flux2/model_training/train.py b/examples/flux2/model_training/train.py index 5378da4e2..144f41ea1 100644 --- a/examples/flux2/model_training/train.py +++ b/examples/flux2/model_training/train.py @@ -19,6 +19,7 @@ def __init__( fp8_models=None, offload_models=None, template_model_id_or_path=None, + enable_lora_hot_loading=False, device="cpu", task="sft", ): @@ -29,6 +30,7 @@ def __init__( self.pipe = Flux2ImagePipeline.from_pretrained(torch_dtype=torch.bfloat16, device=device, model_configs=model_configs, tokenizer_config=tokenizer_config) self.pipe = self.load_training_template_model(self.pipe, template_model_id_or_path, args.use_gradient_checkpointing, args.use_gradient_checkpointing_offload) self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model) + if enable_lora_hot_loading: self.pipe.dit = self.pipe.enable_lora_hot_loading(self.pipe.dit) # Training mode self.switch_pipe_to_training_mode( @@ -129,6 +131,7 @@ def flux2_parser(): fp8_models=args.fp8_models, offload_models=args.offload_models, template_model_id_or_path=args.template_model_id_or_path, + enable_lora_hot_loading=args.enable_lora_hot_loading, task=args.task, device="cpu" if args.initialize_model_on_cpu else accelerator.device, ) From f58ba5a784d6830f087c0f3099fb219a4abbe75d Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Thu, 16 Apr 2026 20:24:22 +0800 Subject: [PATCH 04/12] update docs --- README.md | 19 +- README_zh.md | 19 +- diffsynth/diffusion/template.py | 3 + .../Template_Model_Inference.md | 330 ++++++++++++++++++ .../Template_Model_Training.md | 297 ++++++++++++++++ .../Understanding_Diffusion_Templates.md | 62 ++++ docs/en/Model_Details/FLUX2.md | 9 + docs/en/README.md | 17 +- docs/en/index.rst | 8 + .../Template_Model_Inference.md | 330 ++++++++++++++++++ .../Template_Model_Training.md | 317 +++++++++++++++++ .../Understanding_Diffusion_Templates.md | 61 ++++ docs/zh/Model_Details/FLUX2.md | 9 + docs/zh/README.md | 17 +- docs/zh/index.rst | 8 + .../Template-KleinBase4B-Aesthetic.py | 52 +++ .../Template-KleinBase4B-Brightness.py | 43 +++ .../Template-KleinBase4B-ControlNet.py | 54 +++ .../Template-KleinBase4B-Edit.py | 54 +++ .../Template-KleinBase4B-Inpaint.py | 56 +++ .../Template-KleinBase4B-PandaMeme.py | 43 +++ .../Template-KleinBase4B-Sharpness.py | 35 ++ .../Template-KleinBase4B-SoftRGB.py | 52 +++ .../Template-KleinBase4B-Upscaler.py | 54 +++ .../model_inference/Template-KleinBase4B.py | 256 -------------- .../Template-KleinBase4B-Aesthetic.py | 63 ++++ .../Template-KleinBase4B-Brightness.py | 55 +++ .../Template-KleinBase4B-ControlNet.py | 66 ++++ .../Template-KleinBase4B-Edit.py | 66 ++++ .../Template-KleinBase4B-Inpaint.py | 68 ++++ .../Template-KleinBase4B-PandaMeme.py | 55 +++ .../Template-KleinBase4B-Sharpness.py | 47 +++ .../Template-KleinBase4B-SoftRGB.py | 64 ++++ .../Template-KleinBase4B-Upscaler.py | 66 ++++ .../full/Template-KleinBase4B-Aesthetic.sh | 19 + .../full/Template-KleinBase4B-Brightness.sh | 18 + .../full/Template-KleinBase4B-ControlNet.sh | 18 + ...Base4B.sh => Template-KleinBase4B-Edit.sh} | 15 +- .../full/Template-KleinBase4B-Inpaint.sh | 18 + .../full/Template-KleinBase4B-PandaMeme.sh | 18 + .../full/Template-KleinBase4B-Sharpness.sh | 18 + .../full/Template-KleinBase4B-SoftRGB.sh | 18 + .../full/Template-KleinBase4B-Upscaler.sh | 18 + .../scripts/brightness/model.py | 62 ++++ ...> convert_base_model_to_template_model.py} | 0 .../FLUX.2-klein-base-4B_lora.sh | 34 ++ .../Template-KleinBase4B-Brightness.sh | 36 ++ .../Template-KleinBase4B-Aesthetic.py | 55 +++ .../Template-KleinBase4B-Brightness.py | 46 +++ .../Template-KleinBase4B-ControlNet.py | 57 +++ .../Template-KleinBase4B-Edit.py | 57 +++ .../Template-KleinBase4B-Inpaint.py | 59 ++++ .../Template-KleinBase4B-PandaMeme.py | 46 +++ .../Template-KleinBase4B-Sharpness.py | 38 ++ .../Template-KleinBase4B-SoftRGB.py | 55 +++ .../Template-KleinBase4B-Upscaler.py | 57 +++ 56 files changed, 3237 insertions(+), 280 deletions(-) create mode 100644 docs/en/Diffusion_Templates/Template_Model_Inference.md create mode 100644 docs/en/Diffusion_Templates/Template_Model_Training.md create mode 100644 docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md create mode 100644 docs/zh/Diffusion_Templates/Template_Model_Inference.md create mode 100644 docs/zh/Diffusion_Templates/Template_Model_Training.md create mode 100644 docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Brightness.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Edit.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py delete mode 100644 examples/flux2/model_inference/Template-KleinBase4B.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh rename examples/flux2/model_training/full/{Template-KleinBase4B.sh => Template-KleinBase4B-Edit.sh} (52%) create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh create mode 100644 examples/flux2/model_training/scripts/brightness/model.py rename examples/flux2/model_training/scripts/{convert_base_model_to_skill_model.py => convert_base_model_to_template_model.py} (100%) create mode 100644 examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh create mode 100644 examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py diff --git a/README.md b/README.md index b9f8ab02e..fb905d2b9 100644 --- a/README.md +++ b/README.md @@ -343,11 +343,20 @@ Example code for FLUX.2 is available at: [/examples/flux2/](/examples/flux2/) | Model ID | Inference | Low-VRAM Inference | Full Training | Full Training Validation | LoRA Training | LoRA Training Validation | |-|-|-|-|-|-|-| -|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| -|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| -|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| -|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| -|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| +|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| +|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| +|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| +|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| diff --git a/README_zh.md b/README_zh.md index 76b29d873..5d5a4f7ce 100644 --- a/README_zh.md +++ b/README_zh.md @@ -343,11 +343,20 @@ FLUX.2 的示例代码位于:[/examples/flux2/](/examples/flux2/) |模型 ID|推理|低显存推理|全量训练|全量训练后验证|LoRA 训练|LoRA 训练后验证| |-|-|-|-|-|-|-| -|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| -|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| -|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| -|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| -|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| +|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| +|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| +|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| +|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| diff --git a/diffsynth/diffusion/template.py b/diffsynth/diffusion/template.py index 9277dd7f5..c685ad6d8 100644 --- a/diffsynth/diffusion/template.py +++ b/diffsynth/diffusion/template.py @@ -88,6 +88,9 @@ def __init__( self.model_configs = model_configs self.lazy_loading = lazy_loading if lazy_loading: + for model_config in model_configs: + TemplatePipeline.check_vram_config(model_config) + model_config.download_if_necessary() self.models = None else: models = [] diff --git a/docs/en/Diffusion_Templates/Template_Model_Inference.md b/docs/en/Diffusion_Templates/Template_Model_Inference.md new file mode 100644 index 000000000..8e1a0b022 --- /dev/null +++ b/docs/en/Diffusion_Templates/Template_Model_Inference.md @@ -0,0 +1,330 @@ +# Template Model Inference + +## Enabling Template Models on Base Model Pipelines + +Using the base model [black-forest-labs/FLUX.2-klein-base-4B](https://modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) as an example, when generating images using only the base model: + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +# Generate an image +image = pipe( + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, +) +image.save("image.png") +``` + +The Template model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) can control image brightness during generation. Through the `TemplatePipeline` model, it can be loaded from ModelScope (via `ModelConfig(model_id="xxx/xxx")`) or from a local path (via `ModelConfig(path="xxx")`). Inputting `scale=0.8` increases image brightness. Note that in the code, input parameters for `pipe` must be transferred to `template_pipeline`, and `template_inputs` should be added. + +```python +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], +) +# Generate an image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], +) +image.save("image_0.8.png") +``` + +## CFG Enhancement for Template Models + +Template models can enable CFG (Classifier-Free Guidance) to make control effects more pronounced. For example, with the model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness), adding `negative_template_inputs` to the TemplatePipeline input parameters and setting its scale to 0.5 will generate images with more noticeable brightness variations by contrasting both sides. + +```python +# Generate an image with CFG +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], + negative_template_inputs=[{"scale": 0.5}], +) +image.save("image_0.8_cfg.png") +``` + +## Low VRAM Support + +Template models currently do not support the main framework's VRAM management, but lazy loading can be used - loading Template models only when needed for inference. This significantly reduces VRAM requirements when enabling multiple Template models, with peak VRAM usage being that of a single Template model. Add parameter `lazy_loading=True` to enable. + +```python +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], + lazy_loading=True, +) +``` + +The base model's Pipeline and Template Pipeline are completely independent and can enable VRAM management on demand. + +When Template model outputs contain LoRA in Template Cache, you need to enable VRAM management for the base model's Pipeline or enable LoRA hot loading (using the code below), otherwise LoRA weights will be叠加. + +```python +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +``` + +## Enabling Multiple Template Models + +`TemplatePipeline` can load multiple Template models. During inference, use `model_id` in `template_inputs` to distinguish inputs for each Template model. + +After enabling VRAM management for the base model's Pipeline and lazy loading for Template Pipeline, you can load any number of Template models. + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.bfloat16, + "onload_device": "cuda", + "preparing_dtype": torch.bfloat16, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + lazy_loading=True, + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme"), + ], +) +``` + +### Super-Resolution + Sharpness Enhancement + +Combining [DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) and [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) can upscale blurry images while improving detail clarity. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }, + { + "model_id": 5, + "scale": 1, + }, + ], + negative_template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Upscaler_Sharpness.png") +``` + +| Low Resolution Input | High Resolution Output | +|----------------------|------------------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png) | + +### Structure Control + Aesthetic Alignment + Sharpness Enhancement + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) controls composition, [DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) fills in details, and [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) ensures clarity. Combining these three Template models produces exquisite images. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0.8, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Controlnet_Aesthetic_Sharpness.png") +``` + +| Structure Control Image | Output Image | +|-------------------------|--------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png) | + +### Structure Control + Image Editing + Color Adjustment + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) controls composition, [DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) preserves original image details like fur texture, and [DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) controls color tones, creating an artistic masterpiece. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Colored ink painting.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone. Colored ink painting.", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to colored ink painting.", + }, + { + "model_id": 4, + "R": 0.9, + "G": 0.5, + "B": 0.3, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + ], +) +image.save("image_Controlnet_Edit_SoftRGB.png") +``` + +| Structure Control Image | Editing Input Image | Output Image | +|-------------------------|---------------------|--------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png) | + +### Brightness Control + Image Editing + Local Redrawing + +[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) generates bright scenes, [DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) references original image layout, and [DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) keeps background unchanged, generating cross-dimensional content. + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Flat anime style.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 0, + "scale": 0.6, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to flat anime style.", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + "force_inpaint": True, + }, + ], + negative_template_inputs = [ + { + "model_id": 0, + "scale": 0.5, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + }, + ], +) +image.save("image_Brightness_Edit_Inpaint.png") +``` + +| Reference Image | Redrawing Area | Output Image | +|------------------|----------------|--------------| +| ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg) | ![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png) | \ No newline at end of file diff --git a/docs/en/Diffusion_Templates/Template_Model_Training.md b/docs/en/Diffusion_Templates/Template_Model_Training.md new file mode 100644 index 000000000..d9f1d230f --- /dev/null +++ b/docs/en/Diffusion_Templates/Template_Model_Training.md @@ -0,0 +1,297 @@ +# Template Model Training + +DiffSynth-Studio currently provides comprehensive Template training support for [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B), with more model adaptations coming soon. + +## Continuing Training from Pretrained Models + +To continue training from our pretrained models, refer to the table in [FLUX.2](../Model_Details/FLUX2.md#model-overview) to find the corresponding training script. + +## Building New Template Models + +### Template Model Component Format + +A Template model binds to a model repository (or local folder) containing a code file `model.py` as the entry point. Here's the template for `model.py`: + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + + @torch.no_grad() + def process_inputs(self, xxx, **kwargs): + yyy = xxx + return {"yyy": yyy} + + def forward(self, yyy, **kwargs): + zzz = yyy + return {"zzz": zzz} + +class DataProcessor: + def __call__(self, www, **kwargs): + xxx = www + return {"xxx": xxx} + +TEMPLATE_MODEL = CustomizedTemplateModel +TEMPLATE_MODEL_PATH = "model.safetensors" +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +During Template model inference, Template Input passes through `TEMPLATE_MODEL`'s `process_inputs` and `forward` to generate Template Cache. + +```mermaid +flowchart LR; + i@{shape: text, label: "Template Input"}-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +During Template model training, Template Input comes from the dataset through `TEMPLATE_DATA_PROCESSOR`. + +```mermaid +flowchart LR; + d@{shape: text, label: "Dataset"}-->dp[TEMPLATE_DATA_PROCESSOR]-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +#### `TEMPLATE_MODEL` + +`TEMPLATE_MODEL` implements the Template model logic, inheriting from `torch.nn.Module` with required `process_inputs` and `forward` methods. These two methods form the complete Template model inference process, split into two stages to better support [two-stage split training](https://diffsynth-studio-doc.readthedocs.io/en/latest/Training/Split_Training.html). + +* `process_inputs` must use `@torch.no_grad()` for gradient-free computation +* `forward` must contain all gradient computations required for training + +Both methods should accept `**kwargs` for compatibility. Reserved parameters include: + +* To interact with the base model Pipeline (e.g., call text encoder), add `pipe` parameter to method inputs +* To enable Gradient Checkpointing, add `use_gradient_checkpointing` and `use_gradient_checkpointing_offload` to `forward` inputs +* Multiple Template models use `model_id` to distinguish Template Inputs - do not use this field in method parameters + +#### `TEMPLATE_MODEL_PATH` (Optional) + +`TEMPLATE_MODEL_PATH` specifies the relative path to pretrained weights. For example: + +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +For multi-file models: + +```python +TEMPLATE_MODEL_PATH = [ + "model-00001-of-00003.safetensors", + "model-00002-of-00003.safetensors", + "model-00003-of-00003.safetensors", +] +``` + +Set to `None` for random initialization: + +```python +TEMPLATE_MODEL_PATH = None +``` + +#### `TEMPLATE_DATA_PROCESSOR` (Optional) + +To train Template models with DiffSynth-Studio, datasets should contain `template_inputs` fields in `metadata.json`. These fields pass through `TEMPLATE_DATA_PROCESSOR` to generate inputs for Template model methods. + +For example, the brightness control model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) takes `scale` as input: + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"scale": 0.2} + }, + { + "image": "images/image_2.jpg", + "prompt": "a dog", + "template_inputs": {"scale": 0.6} + } +] +``` + +```python +class DataProcessor: + def __call__(self, scale, **kwargs): + return {"scale": scale} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +Or calculate scale from image paths: + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"image": "/path/to/your/dataset/images/image_1.jpg"} + } +] +``` + +```python +class DataProcessor: + def __call__(self, image, **kwargs): + image = Image.open(image) + image = np.array(image) + return {"scale": image.astype(np.float32).mean() / 255} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +### Training Template Models + +A Template model is "trainable" if its Template Cache variables are fully decoupled from the base model Pipeline - these variables should reach `model_fn` without participating in any Pipeline Unit calculations. + +For training with [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B), use these training script parameters: + +* `--extra_inputs`: Additional inputs. Use `template_inputs` for text-to-image models, `edit_image,template_inputs` for image editing models +* `--template_model_id_or_path`: Template model ID or local path (use `:` suffix for ModelScope IDs, e.g., `"DiffSynth-Studio/Template-KleinBase4B-Brightness:"`) +* `--remove_prefix_in_ckpt`: State dict prefix to remove when saving models (use `"pipe.template_model."`) +* `--trainable_models`: Trainable components (use `"template_model"` for full model, or `"template_model.xxx,template_model.yyy"` for specific components) + +Example training script: + +```shell +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "examples/flux2/model_training/scripts/brightness" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_example" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters +``` + +### Interacting with Base Model Pipeline Components + +Template models can interact with base model Pipelines. For example, using the text encoder: + +```python +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.xxx = xxx() + + @torch.no_grad() + def process_inputs(self, text, pipe, **kwargs): + input_ids = pipe.tokenizer(text) + text_emb = pipe.text_encoder(input_ids) + return {"text_emb": text_emb} + + def forward(self, text_emb, pipe, **kwargs): + kv_cache = self.xxx(text_emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +### Using Non-Trainable Components + +For models with pretrained components: + +```python +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.image_encoder = XXXEncoder.from_pretrained(xxx) + self.mlp = MLP() + + @torch.no_grad() + def process_inputs(self, image, **kwargs): + emb = self.image_encoder(image) + return {"emb": emb} + + def forward(self, emb, **kwargs): + kv_cache = self.mlp(emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +Set `--trainable_models template_model.mlp` to train only the MLP component. + +### Uploading Template Models + +After training, follow these steps to upload to ModelScope: + +1. Set model path in `model.py`: +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +2. Upload using ModelScope CLI: +```shell +modelscope upload user_name/your_model_id /path/to/your/model.py model.py --token ms-xxx +``` + +3. Package model files: +```python +from diffsynth.diffusion.template import load_template_model, load_state_dict +from safetensors.torch import save_file +import torch + +model = load_template_model("path/to/your/template/model", torch_dtype=torch.bfloat16, device="cpu") +state_dict = load_state_dict("path/to/your/ckpt/epoch-1.safetensors", torch_dtype=torch.bfloat16, device="cpu") +state_dict.update(model.state_dict()) +save_file(state_dict, "model.safetensors") +``` + +4. Upload model file: +```shell +modelscope upload user_name/your_model_id /path/to/your/model/epoch-1.safetensors model.safetensors --token ms-xxx +``` + +5. Verify inference: +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) + +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="user_name/your_model_id") + ], +) + +# Generate image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{xxx}], +) +image.save("image.png") \ No newline at end of file diff --git a/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md new file mode 100644 index 000000000..1da52a8fd --- /dev/null +++ b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -0,0 +1,62 @@ +# Understanding Diffusion Templates + +The Diffusion Templates framework is a controllable generation plugin framework in DiffSynth-Studio that provides additional controllable generation capabilities for Diffusion models. + +## Framework Structure + +The Diffusion Templates framework structure is shown below: + +```mermaid +flowchart TD; + subgraph Template Pipeline + si@{shape: text, label: "Template Input"}-->i1@{shape: text, label: "Template Input 1"}; + si@{shape: text, label: "Template Input"}-->i2@{shape: text, label: "Template Input 2"}; + si@{shape: text, label: "Template Input"}-->i3@{shape: text, label: "Template Input 3"}; + i1@{shape: text, label: "Template Input 1"}-->m1[Template Model 1]-->c1@{shape: text, label: "Template Cache 1"}; + i2@{shape: text, label: "Template Input 2"}-->m2[Template Model 2]-->c2@{shape: text, label: "Template Cache 2"}; + i3@{shape: text, label: "Template Input 3"}-->m3[Template Model 3]-->c3@{shape: text, label: "Template Cache 3"}; + c1-->c@{shape: text, label: "Template Cache"}; + c2-->c; + c3-->c; + end + i@{shape: text, label: "Model Input"}-->m[Diffusion Pipeline]-->o@{shape: text, label: "Model Output"}; + c-->m; +``` + +The framework contains these module designs: + +* **Template Input**: Template model input. Format: Python dictionary with fields determined by each Template model (e.g., `{"scale": 0.8}`) +* **Template Model**: Template model, loadable from ModelScope (`ModelConfig(model_id="xxx/xxx")`) or local path (`ModelConfig(path="xxx")`) +* **Template Cache**: Template model output. Format: Python dictionary with fields matching base model Pipeline input parameters +* **Template Pipeline**: Module for managing multiple Template models. Handles model loading and cache integration + +When the Diffusion Templates framework is disabled, base model components (Text Encoder, DiT, VAE) are loaded into the Diffusion Pipeline. Model Input (prompt, height, width) produces Model Output (e.g., images). + +When enabled, Template models are loaded into the Template Pipeline. The Template Pipeline outputs Template Cache (a subset of Diffusion Pipeline input parameters) for subsequent processing in the Diffusion Pipeline. This enables controllable generation by intercepting part of the Diffusion Pipeline's input parameters. + +## Model Capability Medium + +Template Cache is defined as a subset of Diffusion Pipeline input parameters, ensuring framework generality. We restrict Template model inputs to only be Diffusion Pipeline parameters. The KV-Cache is particularly suitable as a Diffusion medium: + +* Proven effective in LLM Skills (prompts are converted to KV-Cache) +* Has "high permission" in Diffusion models - can directly control image generation +* Supports sequence-level concatenation for multiple Template models +* Requires minimal development (add pipeline parameter and integrate to model) + +Other potential Template mediums: +* **Residual**: Used in ControlNet for point-to-point control, but has resolution limitations and potential conflicts when merging +* **LoRA**: Treated as input parameters rather than model components + +**Currently, we only support KV-Cache and LoRA as Template Cache mediums in FLUX.2 Pipeline, with plans to support more models and mediums in the future.** + +## Template Model Format + +A Template model has this structure: + +``` +Template_Model +├── model.py +└── model.safetensors +``` + +Where `model.py` is the entry point and `model.safetensors` contains model weights. For implementation details, see [Template Model Training](Template_Model_Training.md) or [existing Template models](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness). \ No newline at end of file diff --git a/docs/en/Model_Details/FLUX2.md b/docs/en/Model_Details/FLUX2.md index f3bb020af..60128798b 100644 --- a/docs/en/Model_Details/FLUX2.md +++ b/docs/en/Model_Details/FLUX2.md @@ -66,6 +66,15 @@ image.save("image.jpg") |[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| |[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| |[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| Special Training Scripts: diff --git a/docs/en/README.md b/docs/en/README.md index d7e2893ed..e36d21a31 100644 --- a/docs/en/README.md +++ b/docs/en/README.md @@ -18,6 +18,9 @@ graph LR; I_want_to_explore_new_technologies_based_on_this_project-->sec5[Section 5: API Reference]; I_want_to_explore_new_technologies_based_on_this_project-->sec6[Section 6: Academic Guide]; I_encountered_a_problem-->sec7[Section 7: Frequently Asked Questions]; + I_want_to_explore_new_technologies_based_on_this_project-->sec6[Section 6: Diffusion Templates] + I_want_to_explore_new_technologies_based_on_this_project-->sec8[Section 8: Academic Guide]; + I_encountered_a_problem-->sec9[Section 9: Frequently Asked Questions]; ``` @@ -75,7 +78,15 @@ This section introduces the independent core module `diffsynth.core` in `DiffSyn * [`diffsynth.core.loader`](./API_Reference/core/loader.md): Model download and loading * [`diffsynth.core.vram`](./API_Reference/core/vram.md): VRAM management -## Section 6: Academic Guide +## Section 6: Diffusion Templates + +This section introduces the controllable generation plugin framework for Diffusion models, explaining the framework's operation mechanism and how to use Template models for inference and training. + +* [Understanding Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Template Model Inference](./Diffusion_Templates/Template_Model_Inference.md) +* [Template Model Training](./Diffusion_Templates/Template_Model_Training.md) + +## Section 7: Academic Guide This section introduces how to use `DiffSynth-Studio` to train new models, helping researchers explore new model technologies. @@ -84,8 +95,8 @@ This section introduces how to use `DiffSynth-Studio` to train new models, helpi * Designing controllable generation models 【coming soon】 * Creating new training paradigms 【coming soon】 -## Section 7: Frequently Asked Questions +## Section 8: Frequently Asked Questions This section summarizes common developer questions. If you encounter issues during usage or development, please refer to this section. If you still cannot resolve the problem, please submit an issue on GitHub. -* [Frequently Asked Questions](./QA.md) \ No newline at end of file +* [Frequently Asked Questions](./QA.md) diff --git a/docs/en/index.rst b/docs/en/index.rst index 4b933cac2..34c00b687 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -60,6 +60,14 @@ Welcome to DiffSynth-Studio's Documentation API_Reference/core/loader API_Reference/core/vram +.. toctree:: + :maxdepth: 2 + :caption: Diffusion Templates + + Diffusion_Templates/Understanding_Diffusion_Templates.md + Diffusion_Templates/Template_Model_Inference.md + Diffusion_Templates/Template_Model_Training.md + .. toctree:: :maxdepth: 2 :caption: Research Guide diff --git a/docs/zh/Diffusion_Templates/Template_Model_Inference.md b/docs/zh/Diffusion_Templates/Template_Model_Inference.md new file mode 100644 index 000000000..8fdd8e648 --- /dev/null +++ b/docs/zh/Diffusion_Templates/Template_Model_Inference.md @@ -0,0 +1,330 @@ +# Template 模型推理 + +## 在基础模型 Pipeline 上启用 Template 模型 + +我们以基础模型 [black-forest-labs/FLUX.2-klein-base-4B](https://modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) 为例,当仅使用基础模型生成图像时 + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +# Generate an image +image = pipe( + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, +) +image.save("image.png") +``` + +Template 模型 [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) 可以控制模型生成图像的亮度。通过 `TemplatePipeline` 模型,可从魔搭模型库加载(`ModelConfig(model_id="xxx/xxx")`)或从本地路径加载(`ModelConfig(path="xxx")`)。输入 scale=0.8 提高图像的亮度。注意在代码中,需将 `pipe` 的输入参数转移到 `template_pipeline` 中,并添加 `template_inputs`。 + +```python +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], +) +# Generate an image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], +) +image.save("image_0.8.png") +``` + +## Template 模型的 CFG 增强 + +Template 模型可以开启 CFG(Classifier-Free Guidance),使其控制效果更明显。例如模型 [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness),在 `TemplatePipeline` 的输入参数中添加 `negative_template_inputs` 并将其 scale 设置为 0.5,模型就会对比两侧的差异,生成亮度变化更明显的图像。 + +```python +# Generate an image with CFG +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{"scale": 0.8}], + negative_template_inputs=[{"scale": 0.5}], +) +image.save("image_0.8_cfg.png") +``` + +## 低显存支持 + +Template 模型暂不支持主框架的显存管理,但可以使用惰性加载,仅在需要推理时加载对应的 Template 模型,这在启用多个 Template 模型时可以显著降低显存需求,显存占用峰值为单个 Template 模型的显存占用量。添加参数 `lazy_loading=True` 即可。 + +```python +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ], + lazy_loading=True, +) +``` + +基础模型的 Pipeline 与 Template Pipeline 完全独立,可按需开启显存管理。 + +当 Template 模型输出的 Template Cache 包含 LoRA 时,需对基础模型的 Pipeline 开启显存管理或开启 LoRA 热加载(使用以下代码),否则会导致 LoRA 权重叠加。 + +```python +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +``` + +## 启用多个 Template 模型 + +`TemplatePipeline` 可以加载多个 Template 模型,推理时在 `template_inputs` 中使用 `model_id` 区分每个 Template 模型的输入。 + +对基础模型 Pipeline 存管理,对 Template Pipeline 开启惰性加载后,你可以加载任意多个 Template 模型。 + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.bfloat16, + "onload_device": "cuda", + "preparing_dtype": torch.bfloat16, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + lazy_loading=True, + model_configs=[ + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme"), + ], +) +``` + +### 超分辨率 + 锐利激发 + +组合 [DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) 和 [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness),可以将模糊图片高清化,同时提高细节部分的清晰度。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }, + { + "model_id": 5, + "scale": 1, + }, + ], + negative_template_inputs = [ + { + "model_id": 3, + "image": Image.open("data/assets/image_lowres_100.jpg"), + "prompt": "", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Upscaler_Sharpness.png") +``` + +|低清晰度输入|高清晰度输出| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png)| + +### 结构控制 + 美学对齐 + 锐利激发 + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) 负责控制构图,[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) 负责填充细节,[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) 负责保证清晰度,融合三个 Template 模型可以获得精美的画面。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0.8, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 7, + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.0, + "merge_type": "mean", + }, + { + "model_id": 5, + "scale": 0, + }, + ], +) +image.save("image_Controlnet_Aesthetic_Sharpness.png") +``` + +|结构控制图|输出图| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png)| + +### 结构控制 + 图像编辑 + 色彩调节 + +[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) 负责控制构图,[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) 负责保留原图的毛发纹理等细节,[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) 负责控制画面色调,一副极具艺术感的画作被渲染出来。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Colored ink painting.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "A cat is sitting on a stone. Colored ink painting.", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to colored ink painting.", + }, + { + "model_id": 4, + "R": 0.9, + "G": 0.5, + "B": 0.3, + }, + ], + negative_template_inputs = [ + { + "model_id": 1, + "image": Image.open("data/assets/image_depth.jpg"), + "prompt": "", + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + ], +) +image.save("image_Controlnet_Edit_SoftRGB.png") +``` + +|结构控制图|编辑输入图|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png)| + +### 亮度控制 + 图像编辑 + 局部重绘 + +[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) 负责生成明亮的画面,[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) 负责参考原图布局,[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) 负责控制背景不变,生成跨越二次元的画面内容。 + +```python +image = template( + pipe, + prompt="A cat is sitting on a stone. Flat anime style.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [ + { + "model_id": 0, + "scale": 0.6, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "Convert the image style to flat anime style.", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + "force_inpaint": True, + }, + ], + negative_template_inputs = [ + { + "model_id": 0, + "scale": 0.5, + }, + { + "model_id": 2, + "image": Image.open("data/assets/image_reference.jpg"), + "prompt": "", + }, + { + "model_id": 6, + "image": Image.open("data/assets/image_reference.jpg"), + "mask": Image.open("data/assets/image_mask_1.jpg"), + }, + ], +) +image.save("image_Brightness_Edit_Inpaint.png") +``` + +|参考图|重绘区域|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| diff --git a/docs/zh/Diffusion_Templates/Template_Model_Training.md b/docs/zh/Diffusion_Templates/Template_Model_Training.md new file mode 100644 index 000000000..a45180db5 --- /dev/null +++ b/docs/zh/Diffusion_Templates/Template_Model_Training.md @@ -0,0 +1,317 @@ +# Template 模型训练 + +DiffSynth-Studio 目前已为 [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) 提供了全面的 Templates 训练支持,更多模型的适配敬请期待。 + +## 基于预训练 Template 模型继续训练 + +如需基于我们预训练好的模型进行继续训练,请参考[FLUX.2](../Model_Details/FLUX2.md#模型总览) 中的表格,找到对应的训练脚本。 + +## 构建新的 Template 模型 + +### Template 模型组件格式 + +一个 Template 模型与一个模型库(或一个本地文件夹)绑定,模型库中有代码文件 `model.py` 作为唯一入口。`model.py` 的模板如下: + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + + @torch.no_grad() + def process_inputs(self, xxx, **kwargs): + yyy = xxx + return {"yyy": yyy} + + def forward(self, yyy, **kwargs): + zzz = yyy + return {"zzz": zzz} + +class DataProcessor: + def __call__(self, www, **kwargs): + xxx = www + return {"xxx": xxx} + +TEMPLATE_MODEL = CustomizedTemplateModel +TEMPLATE_MODEL_PATH = "model.safetensors" +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +在 Template 模型推理时,Template Input 先后经过 `TEMPLATE_MODEL` 的 `process_inputs` 和 `forward` 得到 Template Cache。 + +```mermaid +flowchart LR; + i@{shape: text, label: "Template Input"}-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +在 Template 模型训练时,Template Input 不再是用户的输入,而是从数据集中获取,由 `TEMPLATE_DATA_PROCESSOR` 进行计算得到。 + +```mermaid +flowchart LR; + d@{shape: text, label: "Dataset"}-->dp[TEMPLATE_DATA_PROCESSOR]-->p[process_inputs]; + subgraph TEMPLATE_MODEL + p[process_inputs]-->f[forward] + end + f[forward]-->c@{shape: text, label: "Template Cache"}; +``` + +#### `TEMPLATE_MODEL` + +`TEMPLATE_MODEL` 是 Template 模型的代码实现,需继承 `torch.nn.Module`,并编写 `process_inputs` 与 `forward` 两个函数。`process_inputs` 与 `forward` 构成完整的 Template 模型推理过程,我们将其拆分为两部分,是为了在训练中更容易适配[两阶段拆分训练](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Training/Split_Training.html)。 + +* `process_inputs` 需带有装饰器 `@torch.no_grad()`,进行不包含梯度的计算 +* `forward` 需包含训练模型所需的全部梯度计算过程,其输入与 `process_inputs` 的输出相同 + +`process_inputs` 与 `forward` 需包含 `**kwargs`,保证兼容性,此外,我们提供了以下预留的参数 + +* 如需在 `process_inputs` 与 `forward` 中和基础模型 Pipeline 进行交互,例如调用基础模型 Pipeline 中的文本编码器进行计算,可在 `process_inputs` 与 `forward` 的输入参数中增加字段 `pipe` +* 如需在训练中启用 Gradient Checkpointing,可在 `forward` 的输入参数中增加字段 `use_gradient_checkpointing` 与 `use_gradient_checkpointing_offload` +* 多个 Template 模型需通过 `model_id` 区分 Template Inputs,请不要在 `process_inputs` 与 `forward` 的输入参数中使用这个字段 + +#### `TEMPLATE_MODEL_PATH`(可选项) + +`TEMPLATE_MODEL_PATH` 是模型预训练权重文件的相对路径,例如 + +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +如需从多个模型文件中加载,可使用列表 + +```python +TEMPLATE_MODEL_PATH = [ + "model-00001-of-00003.safetensors", + "model-00002-of-00003.safetensors", + "model-00003-of-00003.safetensors", +] +``` + +如果需要随机初始化模型参数(模型还未训练),或不需要初始化模型参数,可将其设置为 `None`,或不设置 + +```python +TEMPLATE_MODEL_PATH = None +``` + +#### `TEMPLATE_DATA_PROCESSOR`(可选项) + +如需使用 DiffSynth-Studio 训练 Template 模型,则需构建训练数据集,数据集中的 `metadata.json` 包含 `template_inputs` 字段。`metadata.json` 中的 `template_inputs` 并不是直接输入给 Template 模型 `process_inputs` 的参数,而是提供给 `TEMPLATE_DATA_PROCESSOR` 的输入参数,由 `TEMPLATE_DATA_PROCESSOR` 计算出输入给 Template 模型 `process_inputs` 的参数。 + +例如,[DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) 这一亮度控制模型的输入参数是 `scale`,即图像的亮度数值。`scale` 可以直接写在 `metadata.json` 中,此时 `TEMPLATE_DATA_PROCESSOR` 只需要传递参数: + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"scale": 0.2} + }, + { + "image": "images/image_2.jpg", + "prompt": "a dog", + "template_inputs": {"scale": 0.6} + } +] +``` + +```python +class DataProcessor: + def __call__(self, scale, **kwargs): + return {"scale": scale} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +也可在 `metadata.json` 中填写图像路径,直接在训练过程中计算 `scale`。 + +```json +[ + { + "image": "images/image_1.jpg", + "prompt": "a cat", + "template_inputs": {"image": "/path/to/your/dataset/images/image_1.jpg"} + }, + { + "image": "images/image_2.jpg", + "prompt": "a dog", + "template_inputs": {"image": "/path/to/your/dataset/images/image_1.jpg"} + } +] +``` + +```python +class DataProcessor: + def __call__(self, image, **kwargs): + image = Image.open(image) + image = np.array(image) + return {"scale": image.astype(np.float32).mean() / 255} + +TEMPLATE_DATA_PROCESSOR = DataProcessor +``` + +### 训练 Template 模型 + +Template 模型“可训练”的充分条件是:Template Cache 中的变量计算与基础模型 Pipeline 完全解耦,这些变量在推理过程中输入给基础模型 Pipeline 后,不会参与任何 Pipeline Unit 的计算,直达 `model_fn`。 + +如果 Template 模型是“可训练”的,那么可以使用 DiffSynth-Studio 进行训练,以基础模型 [black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B) 为例,在训练脚本中,填写字段: + +* `--extra_inputs`:额外输入,训练文生图模型的 Template 模型时只需填 `template_inputs`,训练图像编辑模型的 Template 模型时需填 `edit_image,template_inputs` +* `--template_model_id_or_path`:Template 模型的魔搭模型 ID 或本地路径,框架会优先匹配本地路径,若本地路径不存在则从魔搭模型库中下载该模型,填写模型 ID 时,以“:”结尾,例如 `"DiffSynth-Studio/Template-KleinBase4B-Brightness:"` +* `--remove_prefix_in_ckpt`:保存模型文件时,移除的 state dict 变量名前缀,填 `"pipe.template_model."` 即可 +* `--trainable_models`:可训练模型,填写 `template_model` 即可,若只需训练其中的某个组件,则需填写 `template_model.xxx,template_model.yyy`,以逗号分隔 + +以下是一个样例训练脚本,它会自动下载一个样例数据集,随机初始化模型权重后开始训练亮度控制模型: + +```shell +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "examples/flux2/model_training/scripts/brightness" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_example" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters +``` + +### 与基础模型 Pipeline 组件交互 + +Diffusion Template 框架允许 Template 模型与基础模型 Pipeline 进行交互。例如,你可能需要使用基础模型 Pipeline 中的 text encoder 对文本进行编码,此时在 `process_inputs` 和 `forward` 中使用预留字段 `pipe` 即可。 + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.xxx = xxx() + + @torch.no_grad() + def process_inputs(self, text, pipe, **kwargs): + input_ids = pipe.tokenizer(text) + text_emb = pipe.text_encoder(text_emb) + return {"text_emb": text_emb} + + def forward(self, text_emb, pipe, **kwargs): + kv_cache = self.xxx(text_emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +### 使用非训练的模型组件 + +在设计 Template 模型时,如果需要使用预训练的模型且不希望在训练过程中更新这部分参数,例如 + +```python +import torch + +class CustomizedTemplateModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.image_encoder = XXXEncoder.from_pretrained(xxx) + self.mlp = MLP() + + @torch.no_grad() + def process_inputs(self, image, **kwargs): + emb = self.image_encoder(image) + return {"emb": emb} + + def forward(self, emb, **kwargs): + kv_cache = self.mlp(emb) + return {"kv_cache": kv_cache} + +TEMPLATE_MODEL = CustomizedTemplateModel +``` + +此时需在训练命令中通过参数 `--trainable_models template_model.mlp` 设置为仅训练 `mlp` 部分。 + +### 上传 Template 模型 + +完成训练后,按照以下步骤可上传 Template 模型到魔搭社区 + +Step 1:在 `model.py` 中填入训练好的模型文件名,例如 + +```python +TEMPLATE_MODEL_PATH = "model.safetensors" +``` + +Step 2:使用以下命令上传 `model.py`,其中 `--token ms-xxx` 在 https://modelscope.cn/my/access/token 获取 + +```shell +modelscope upload user_name/your_model_id /path/to/your/model.py model.py --token ms-xxx +``` + +Step 3:确认模型文件 + +确认要上传的模型文件,例如 `epoch-1.safetensors`、`step-2000.safetensors`。 + +注意,DiffSynth-Studio 保存的模型文件中只包含可训练的参数,如果模型中包括非训练参数,则需要重新将非训练的模型参数打包才能进行推理,你可以通过以下代码进行打包: + +```python +from diffsynth.diffusion.template import load_template_model, load_state_dict +from safetensors.torch import save_file +import torch + +model = load_template_model("path/to/your/template/model", torch_dtype=torch.bfloat16, device="cpu") +state_dict = load_state_dict("path/to/your/ckpt/epoch-1.safetensors", torch_dtype=torch.bfloat16, device="cpu") +state_dict.update(model.state_dict()) +save_file(state_dict, "model.safetensors") +``` + +Step 4:上传模型文件 + +```shell +modelscope upload user_name/your_model_id /path/to/your/model/epoch-1.safetensors model.safetensors --token ms-xxx +``` + +Step 5:验证模型推理效果 + +```python +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +# Load base model +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +# Load Template model +template_pipeline = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="user_name/your_model_id") + ], +) +# Generate an image +image = template_pipeline( + pipe, + prompt="a cat", + seed=0, cfg_scale=4, + height=1024, width=1024, + template_inputs=[{xxx}], +) +image.save("image.png") +``` + diff --git a/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md new file mode 100644 index 000000000..622e6a933 --- /dev/null +++ b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -0,0 +1,61 @@ +# 理解 Diffusion Templates + +## 框架结构 + +Diffusion Templates 框架的结构如下图所示: + +```mermaid +flowchart TD; + subgraph Template Pipeline + si@{shape: text, label: "Template Input"}-->i1@{shape: text, label: "Template Input 1"}; + si@{shape: text, label: "Template Input"}-->i2@{shape: text, label: "Template Input 2"}; + si@{shape: text, label: "Template Input"}-->i3@{shape: text, label: "Template Input 3"}; + i1@{shape: text, label: "Template Input 1"}-->m1[Template Model 1]-->c1@{shape: text, label: "Template Cache 1"}; + i2@{shape: text, label: "Template Input 2"}-->m2[Template Model 2]-->c2@{shape: text, label: "Template Cache 2"}; + i3@{shape: text, label: "Template Input 3"}-->m3[Template Model 3]-->c3@{shape: text, label: "Template Cache 3"}; + c1-->c@{shape: text, label: "Template Cache"}; + c2-->c; + c3-->c; + end + i@{shape: text, label: "Model Input"}-->m[Diffusion Pipeline]-->o@{shape: text, label: "Model Output"}; + c-->m; +``` + +框架包含以下模块设计: + +* Template Input: Template 模型的输入。其格式为 Python 字典,其中的字段由每个 Template 模型自身决定,例如 `{"scale": 0.8}` +* Template Model: Template 模型,可从魔搭模型库加载(`ModelConfig(model_id="xxx/xxx")`)或从本地路径加载(`ModelConfig(path="xxx")`) +* Template Cache: Template 模型的输出。其格式为 Python 字典,其中的字段仅支持对应基础模型 Pipeline 中的输入参数字段。 +* Template Pipeline: 用于调度多个 Template 模型的模块。该模块负责加载 Template 模型、整合多个 Template 模型的输出 + +当 Diffusion Templates 框架未启用时,基础模型组件(包括 Text Encoder、DiT、VAE 等)被加载到 Diffusion Pipeline 中,输入 Model Input(包括 prompt、height、width 等),输出 Model Output(例如图像)。 + +当 Diffusion Templates 框架启用后,若干个 Template 模型被加载到 Template Pipeline 中,Template Pipeline 输出 Template Cache(Diffusion Pipeline 输入参数的子集),并交由 Diffusion Pipeline 进行后续的进一步处理。Template Pipeline 通过接管一部分 Diffusion Pipeline 的输入参数来实现可控生成。 + +## 模型能力媒介 + +注意到,Template Cache 的格式被定义为 Diffusion Pipeline 输入参数的子集,这是框架通用性设计的基本保证,我们限制 Template 模型的输入只能是 Diffusion Pipeline 的输入参数。因此,我们需要为 Diffusion Pipeline 设计额外的输入参数作为模型能力媒介。其中,KV-Cache 是非常适合 Diffusion 的模型能力媒介 + +* 技术路线已经在 LLM Skills 上得到了验证,LLM 中输入的提示词也会被潜在地转化为 KV-Cache +* KV-Cache 具有 Diffusion 模型的“高权限”,在生图模型上能够直接影响甚至完全控制生图结果,这保证 Diffusion Template 模型具备足够高的能力上限 +* KV-Cache 可以直接在序列层面拼接,让多个 Template 模型同时生效 +* KV-Cache 在框架层面的开发量少,增加一个 Pipeline 的输入参数并穿透到模型内部即可,可以快速适配新的 Diffusion 基础模型 + +另外,还有以下媒介也可以用于 Template: + +* Residual:残差,在 ControlNet 中使用较多,适合做点对点的控制,和 KVCache 相比缺点是不能支持任意分辨率以及多个 Residual 融合时可能冲突 +* LoRA:不要把它当成模型的一部分,而是把它当成模型的输入参数,LoRA 本质上是一系列张量,也可以作为模型能力的媒介 + +**目前,我们仅在 FLUX.2 的 Pipeline 上提供了 KV-Cache 和 LoRA 作为 Template Cache 的支持,后续会考虑支持更多模型和更多模型能力媒介。** + +## Template 模型格式 + +一个 Template 模型的格式为: + +``` +Template_Model +├── model.py +└── model.safetensors +``` + +其中,`model.py` 是模型的入口,`model.safetensors` 是 Template 模型的权重文件。关于如何构建 Template 模型,请参考文档 [Template 模型训练](Template_Model_Training.md),或参考[现有的 Template 模型](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)。 diff --git a/docs/zh/Model_Details/FLUX2.md b/docs/zh/Model_Details/FLUX2.md index 66725e6c4..16f287238 100644 --- a/docs/zh/Model_Details/FLUX2.md +++ b/docs/zh/Model_Details/FLUX2.md @@ -66,6 +66,15 @@ image.save("image.jpg") |[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| |[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| |[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| 特殊训练脚本: diff --git a/docs/zh/README.md b/docs/zh/README.md index 8cec5d6c7..e1c463b1c 100644 --- a/docs/zh/README.md +++ b/docs/zh/README.md @@ -16,8 +16,9 @@ graph LR; 我想要基于此框架进行二次开发-->sec5[Section 5: API 参考]; 我想要基于本项目探索新的技术-->sec4[Section 4: 模型接入]; 我想要基于本项目探索新的技术-->sec5[Section 5: API 参考]; - 我想要基于本项目探索新的技术-->sec6[Section 6: 学术导引]; - 我遇到了问题-->sec7[Section 7: 常见问题]; + 我想要基于本项目探索新的技术-->sec6[Section 6: Diffusion Templates] + 我想要基于本项目探索新的技术-->sec7[Section 7: 学术导引]; + 我遇到了问题-->sec8[Section 8: 常见问题]; ``` @@ -75,7 +76,15 @@ graph LR; * [`diffsynth.core.loader`](./API_Reference/core/loader.md): 模型下载与加载 * [`diffsynth.core.vram`](./API_Reference/core/vram.md): 显存管理 -## Section 6: 学术导引 +## Section 6: Diffusion Templates + +本节介绍 Diffusion 模型可控生成插件框架 Diffusion Templates,讲解 Diffusion Templates 框架的运行机制,展示如何使用 Template 模型进行推理和训练。 + +* [理解 Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Template 模型推理](./Diffusion_Templates/Template_Model_Inference.md) +* [Template 模型训练](./Diffusion_Templates/Template_Model_Training.md) + +## Section 7: 学术导引 本节介绍如何利用 `DiffSynth-Studio` 训练新的模型,帮助科研工作者探索新的模型技术。 @@ -84,7 +93,7 @@ graph LR; * 设计可控生成模型【coming soon】 * 创建新的训练范式【coming soon】 -## Section 7: 常见问题 +## Section 8: 常见问题 本节总结了开发者常见的问题,如果你在使用和开发中遇到了问题,请参考本节内容,如果仍无法解决,请到 GitHub 上给我们提 issue。 diff --git a/docs/zh/index.rst b/docs/zh/index.rst index 42256b3b5..8042013d3 100644 --- a/docs/zh/index.rst +++ b/docs/zh/index.rst @@ -60,6 +60,14 @@ API_Reference/core/loader API_Reference/core/vram +.. toctree:: + :maxdepth: 2 + :caption: Diffusion Templates + + Diffusion_Templates/Understanding_Diffusion_Templates.md + Diffusion_Templates/Template_Model_Inference.md + Diffusion_Templates/Template_Model_Training.md + .. toctree:: :maxdepth: 2 :caption: 学术导引 diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py b/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py new file mode 100644 index 000000000..455a238bd --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py @@ -0,0 +1,52 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_1.0.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_2.5.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py b/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py new file mode 100644 index 000000000..9a25f50d8 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py @@ -0,0 +1,43 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.7}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_light.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.5}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.3}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_dark.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py b/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py new file mode 100644 index 000000000..d0c33a9ed --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py @@ -0,0 +1,54 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_sunshine.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_magic.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Edit.py b/examples/flux2/model_inference/Template-KleinBase4B-Edit.py new file mode 100644 index 000000000..e229f1c72 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Edit.py @@ -0,0 +1,54 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="Put a hat on this cat.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Put a hat on this cat.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_hat.jpg") +image = template( + pipe, + prompt="Make the cat turn its head to look to the right.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Make the cat turn its head to look to the right.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_head.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py b/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py new file mode 100644 index 000000000..c5826376b --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py @@ -0,0 +1,56 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="An orange cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + "force_inpaint": True, + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + }], +) +image.save("image_Inpaint_1.jpg") +image = template( + pipe, + prompt="A cat wearing sunglasses is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], +) +image.save("image_Inpaint_2.jpg") + diff --git a/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py b/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py new file mode 100644 index 000000000..058816cdf --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py @@ -0,0 +1,43 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")], +) +image = template( + pipe, + prompt="A meme with a sleepy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_sleepy.jpg") +image = template( + pipe, + prompt="A meme with a happy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_happy.jpg") +image = template( + pipe, + prompt="A meme with a surprised expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_surprised.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py b/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py new file mode 100644 index 000000000..e621bd771 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py @@ -0,0 +1,35 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.1}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.8}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.8.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py b/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py new file mode 100644 index 000000000..943ed6c30 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py @@ -0,0 +1,52 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")], +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 128/255, + "G": 128/255, + "B": 128/255 + }], +) +image.save("image_rgb_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 208/255, + "G": 185/255, + "B": 138/255 + }], +) +image.save("image_rgb_warm.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 94/255, + "G": 163/255, + "B": 174/255 + }], +) +image.save("image_rgb_cold.jpg") diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py b/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py new file mode 100644 index 000000000..d527ffb72 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py @@ -0,0 +1,54 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_1.png") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_2.png") diff --git a/examples/flux2/model_inference/Template-KleinBase4B.py b/examples/flux2/model_inference/Template-KleinBase4B.py deleted file mode 100644 index 5b2dd931f..000000000 --- a/examples/flux2/model_inference/Template-KleinBase4B.py +++ /dev/null @@ -1,256 +0,0 @@ -from diffsynth.diffusion.template import TemplatePipeline -from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig -import torch -from PIL import Image -import numpy as np - -def load_template_pipeline(model_ids): - template = TemplatePipeline.from_pretrained( - torch_dtype=torch.bfloat16, - device="cuda", - model_configs=[ModelConfig(model_id=model_id) for model_id in model_ids], - ) - return template - -# Base Model -pipe = Flux2ImagePipeline.from_pretrained( - torch_dtype=torch.bfloat16, - device="cuda", - model_configs=[ - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), - ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), - ], - tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), -) -# image = pipe( -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# ) -# image.save("image_base.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Brightness"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.7}], -# negative_template_inputs = [{"scale": 0.5}] -# ) -# image.save("image_Brightness_light.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.5}], -# negative_template_inputs = [{"scale": 0.5}] -# ) -# image.save("image_Brightness_normal.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.3}], -# negative_template_inputs = [{"scale": 0.5}] -# ) -# image.save("image_Brightness_dark.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-ControlNet"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone, bathed in bright sunshine.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_ControlNet_sunshine.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_depth.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_ControlNet_magic.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Edit"]) -# image = template( -# pipe, -# prompt="Put a hat on this cat.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "Put a hat on this cat.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Edit_hat.jpg") -# image = template( -# pipe, -# prompt="Make the cat turn its head to look to the right.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "Make the cat turn its head to look to the right.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Edit_head.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Upscaler"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_512.jpg"), -# "prompt": "A cat is sitting on a stone.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_512.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Upscaler_1.png") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_100.jpg"), -# "prompt": "A cat is sitting on a stone.", -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_lowres_100.jpg"), -# "prompt": "", -# }], -# ) -# image.save("image_Upscaler_2.png") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-SoftRGB"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "R": 128/255, -# "G": 128/255, -# "B": 128/255 -# }], -# ) -# image.save("image_rgb_normal.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "R": 208/255, -# "G": 185/255, -# "B": 138/255 -# }], -# ) -# image.save("image_rgb_warm.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "R": 94/255, -# "G": 163/255, -# "B": 174/255 -# }], -# ) -# image.save("image_rgb_cold.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-PandaMeme"]) -# image = template( -# pipe, -# prompt="A meme with a sleepy expression.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{}], -# negative_template_inputs = [{}], -# ) -# image.save("image_PandaMeme_sleepy.jpg") -# image = template( -# pipe, -# prompt="A meme with a happy expression.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{}], -# negative_template_inputs = [{}], -# ) -# image.save("image_PandaMeme_happy.jpg") -# image = template( -# pipe, -# prompt="A meme with a surprised expression.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{}], -# negative_template_inputs = [{}], -# ) -# image.save("image_PandaMeme_surprised.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Sharpness"]) -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.1}], -# negative_template_inputs = [{"scale": 0.5}], -# ) -# image.save("image_Sharpness_0.1.jpg") -# image = template( -# pipe, -# prompt="A cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{"scale": 0.8}], -# negative_template_inputs = [{"scale": 0.5}], -# ) -# image.save("image_Sharpness_0.8.jpg") - -# template = load_template_pipeline(["DiffSynth-Studio/Template-KleinBase4B-Inpaint"]) -# image = template( -# pipe, -# prompt="An orange cat is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_1.jpg"), -# "force_inpaint": True, -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_1.jpg"), -# }], -# ) -# image.save("image_Inpaint_1.jpg") -# image = template( -# pipe, -# prompt="A cat wearing sunglasses is sitting on a stone.", -# seed=0, cfg_scale=4, num_inference_steps=50, -# template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_2.jpg"), -# }], -# negative_template_inputs = [{ -# "image": Image.open("data/assets/image_reference.jpg"), -# "mask": Image.open("data/assets/image_mask_2.jpg"), -# }], -# ) -# image.save("image_Inpaint_2.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py new file mode 100644 index 000000000..2c6f60a68 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py @@ -0,0 +1,63 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 1.0, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_1.0.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": list(range(1, 180, 2)), + "lora_scales": 2.5, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_2.5.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py new file mode 100644 index 000000000..8210e66be --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.7}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_light.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.5}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.3}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_dark.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py new file mode 100644 index 000000000..3f469de9c --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py @@ -0,0 +1,66 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_sunshine.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_magic.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py new file mode 100644 index 000000000..c63fb9b5a --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py @@ -0,0 +1,66 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="Put a hat on this cat.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Put a hat on this cat.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_hat.jpg") +image = template( + pipe, + prompt="Make the cat turn its head to look to the right.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Make the cat turn its head to look to the right.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_head.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py new file mode 100644 index 000000000..3106cbad2 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py @@ -0,0 +1,68 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="An orange cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + "force_inpaint": True, + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + }], +) +image.save("image_Inpaint_1.jpg") +image = template( + pipe, + prompt="A cat wearing sunglasses is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], +) +image.save("image_Inpaint_2.jpg") + diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py new file mode 100644 index 000000000..3caa8e8d4 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A meme with a sleepy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_sleepy.jpg") +image = template( + pipe, + prompt="A meme with a happy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_happy.jpg") +image = template( + pipe, + prompt="A meme with a surprised expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_surprised.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py new file mode 100644 index 000000000..042f1a563 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py @@ -0,0 +1,47 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.1}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.8}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.8.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py new file mode 100644 index 000000000..80c7ac80e --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py @@ -0,0 +1,64 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")], + lazy_loading=True, +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 128/255, + "G": 128/255, + "B": 128/255 + }], +) +image.save("image_rgb_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 208/255, + "G": 185/255, + "B": 138/255 + }], +) +image.save("image_rgb_warm.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 94/255, + "G": 163/255, + "B": 174/255 + }], +) +image.save("image_rgb_cold.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py new file mode 100644 index 000000000..d303cb2f2 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py @@ -0,0 +1,66 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_1.png") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_2.png") diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh new file mode 100644 index 000000000..bee97718e --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh @@ -0,0 +1,19 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Aesthetic/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Aesthetic/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Aesthetic:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Aesthetic_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --enable_lora_hot_loading diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh new file mode 100644 index 000000000..2506a62f8 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh b/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh new file mode 100644 index 000000000..ee0d23bcc --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-ControlNet/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ControlNet \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ControlNet/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-ControlNet:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-ControlNet_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh similarity index 52% rename from examples/flux2/model_training/full/Template-KleinBase4B.sh rename to examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh index 093f1ef79..536f963d0 100644 --- a/examples/flux2/model_training/full/Template-KleinBase4B.sh +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh @@ -1,17 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Edit/*" --local_dir ./data/diffsynth_example_dataset + accelerate launch examples/flux2/model_training/train.py \ - --dataset_base_path xxx \ - --dataset_metadata_path xxx/metadata.jsonl \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Edit \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Edit/metadata.jsonl \ --extra_inputs "template_inputs" \ --max_pixels 1048576 \ - --dataset_repeat 1 \ + --dataset_repeat 50 \ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ - --template_model_id_or_path "xxx" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Edit:" \ --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ --learning_rate 1e-4 \ - --num_epochs 999 \ + --num_epochs 2 \ --remove_prefix_in_ckpt "pipe.template_model." \ - --output_path "./models/train/Template-KleinBase4B_full" \ + --output_path "./models/train/Template-KleinBase4B-Edit_full" \ --trainable_models "template_model" \ - --save_steps 1000 \ --use_gradient_checkpointing \ --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh new file mode 100644 index 000000000..19ddb676d --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Inpaint/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Inpaint \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Inpaint/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Inpaint:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Inpaint_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh b/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh new file mode 100644 index 000000000..861eb5715 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-PandaMeme/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-PandaMeme:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-PandaMeme_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh new file mode 100644 index 000000000..3afa3a24f --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Sharpness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Sharpness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Sharpness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Sharpness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Sharpness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh b/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh new file mode 100644 index 000000000..add0ea8af --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-SoftRGB/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-SoftRGB \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-SoftRGB/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-SoftRGB:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-SoftRGB_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh new file mode 100644 index 000000000..aab063bc3 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Upscaler/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Upscaler \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Upscaler/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Upscaler:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Upscaler_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/scripts/brightness/model.py b/examples/flux2/model_training/scripts/brightness/model.py new file mode 100644 index 000000000..9be2cb0a4 --- /dev/null +++ b/examples/flux2/model_training/scripts/brightness/model.py @@ -0,0 +1,62 @@ +import torch, math +from PIL import Image +import numpy as np + + +class SingleValueEncoder(torch.nn.Module): + def __init__(self, dim_in=256, dim_out=4096, length=32): + super().__init__() + self.length = length + self.prefer_value_embedder = torch.nn.Sequential(torch.nn.Linear(dim_in, dim_out), torch.nn.SiLU(), torch.nn.Linear(dim_out, dim_out)) + self.positional_embedding = torch.nn.Parameter(torch.randn(self.length, dim_out)) + + def get_timestep_embedding(self, timesteps, embedding_dim, max_period=10000): + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * torch.arange(0, half_dim, dtype=torch.float32, device=timesteps.device) / half_dim + emb = timesteps[:, None].float() * torch.exp(exponent)[None, :] + emb = torch.cat([torch.cos(emb), torch.sin(emb)], dim=-1) + return emb + + def forward(self, value, dtype): + emb = self.get_timestep_embedding(value * 1000, 256).to(dtype) + emb = self.prefer_value_embedder(emb).squeeze(0) + base_embeddings = emb.expand(self.length, -1) + positional_embedding = self.positional_embedding.to(dtype=base_embeddings.dtype, device=base_embeddings.device) + learned_embeddings = base_embeddings + positional_embedding + return learned_embeddings + + +class ValueFormatModel(torch.nn.Module): + def __init__(self, num_double_blocks=5, num_single_blocks=20, dim=3072, num_heads=24, length=512): + super().__init__() + self.block_names = [f"double_{i}" for i in range(num_double_blocks)] + [f"single_{i}" for i in range(num_single_blocks)] + self.proj_k = torch.nn.ModuleDict({block_name: SingleValueEncoder(dim_out=dim, length=length) for block_name in self.block_names}) + self.proj_v = torch.nn.ModuleDict({block_name: SingleValueEncoder(dim_out=dim, length=length) for block_name in self.block_names}) + self.num_heads = num_heads + self.length = length + + @torch.no_grad() + def process_inputs(self, pipe, scale, **kwargs): + return {"value": torch.Tensor([scale]).to(dtype=pipe.torch_dtype, device=pipe.device)} + + def forward(self, value, **kwargs): + kv_cache = {} + for block_name in self.block_names: + k = self.proj_k[block_name](value, value.dtype) + k = k.view(1, self.length, self.num_heads, -1) + v = self.proj_v[block_name](value, value.dtype) + v = v.view(1, self.length, self.num_heads, -1) + kv_cache[block_name] = (k, v) + return {"kv_cache": kv_cache} + + +class DataAnnotator: + def __call__(self, image, **kwargs): + image = Image.open(image) + image = np.array(image) + return {"scale": image.astype(np.float32).mean() / 255} + + +TEMPLATE_MODEL = ValueFormatModel +TEMPLATE_MODEL_PATH = None # You should modify this parameter after training +TEMPLATE_DATA_PROCESSOR = DataAnnotator \ No newline at end of file diff --git a/examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py b/examples/flux2/model_training/scripts/convert_base_model_to_template_model.py similarity index 100% rename from examples/flux2/model_training/scripts/convert_base_model_to_skill_model.py rename to examples/flux2/model_training/scripts/convert_base_model_to_template_model.py diff --git a/examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh b/examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh new file mode 100644 index 000000000..25751d1a0 --- /dev/null +++ b/examples/flux2/model_training/special/split_training/FLUX.2-klein-base-4B_lora.sh @@ -0,0 +1,34 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/FLUX.2-klein-base-4B/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/example_image_dataset \ + --dataset_metadata_path data/example_image_dataset/metadata.csv \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 5 \ + --remove_prefix_in_ckpt "pipe.dit." \ + --output_path "./models/train/FLUX.2-klein-base-4B_lora_cache" \ + --lora_base_model "dit" \ + --lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out" \ + --lora_rank 32 \ + --use_gradient_checkpointing \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/FLUX.2-klein-base-4B_lora_cache" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 5 \ + --remove_prefix_in_ckpt "pipe.dit." \ + --output_path "./models/train/FLUX.2-klein-base-4B_lora" \ + --lora_base_model "dit" \ + --lora_target_modules "to_q,to_k,to_v,to_out.0,add_q_proj,add_k_proj,add_v_proj,to_add_out,linear_in,linear_out,to_qkv_mlp_proj,single_transformer_blocks.0.attn.to_out,single_transformer_blocks.1.attn.to_out,single_transformer_blocks.2.attn.to_out,single_transformer_blocks.3.attn.to_out,single_transformer_blocks.4.attn.to_out,single_transformer_blocks.5.attn.to_out,single_transformer_blocks.6.attn.to_out,single_transformer_blocks.7.attn.to_out,single_transformer_blocks.8.attn.to_out,single_transformer_blocks.9.attn.to_out,single_transformer_blocks.10.attn.to_out,single_transformer_blocks.11.attn.to_out,single_transformer_blocks.12.attn.to_out,single_transformer_blocks.13.attn.to_out,single_transformer_blocks.14.attn.to_out,single_transformer_blocks.15.attn.to_out,single_transformer_blocks.16.attn.to_out,single_transformer_blocks.17.attn.to_out,single_transformer_blocks.18.attn.to_out,single_transformer_blocks.19.attn.to_out" \ + --lora_rank 32 \ + --use_gradient_checkpointing \ + --task "sft:train" diff --git a/examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh b/examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh new file mode 100644 index 000000000..b214595e6 --- /dev/null +++ b/examples/flux2/model_training/special/split_training/Template-KleinBase4B-Brightness.sh @@ -0,0 +1,36 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:train" diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py new file mode 100644 index 000000000..bdd66d544 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Aesthetic_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="a bird with fire", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": [1], + "lora_scales": 1.0, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": [1], + "lora_scales": 1.0, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_1.0.jpg") +image = template( + pipe, + prompt="a bird with fire", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "lora_ids": [1], + "lora_scales": 2.5, + "merge_type": "mean", + }], + negative_template_inputs = [{ + "lora_ids": [1], + "lora_scales": 2.5, + "merge_type": "mean", + }], +) +image.save("image_Aesthetic_2.5.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py new file mode 100644 index 000000000..7701faf75 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py @@ -0,0 +1,46 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Brightness_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.7}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_light.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.5}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.3}], + negative_template_inputs = [{"scale": 0.5}] +) +image.save("image_Brightness_dark.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py new file mode 100644 index 000000000..c12b977a0 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py @@ -0,0 +1,57 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ControlNet")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-ControlNet_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone, bathed in bright sunshine.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_sunshine.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone, surrounded by colorful magical particles.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "A cat is sitting on a stone, surrounded by colorful magical particles.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_depth.jpg"), + "prompt": "", + }], +) +image.save("image_ControlNet_magic.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py new file mode 100644 index 000000000..5e6d2b5ae --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py @@ -0,0 +1,57 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Edit")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Edit_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="Put a hat on this cat.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Put a hat on this cat.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_hat.jpg") +image = template( + pipe, + prompt="Make the cat turn its head to look to the right.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "Make the cat turn its head to look to the right.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "prompt": "", + }], +) +image.save("image_Edit_head.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py new file mode 100644 index 000000000..5b29df72b --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py @@ -0,0 +1,59 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Inpaint_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="An orange cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + "force_inpaint": True, + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), + }], +) +image.save("image_Inpaint_1.jpg") +image = template( + pipe, + prompt="A cat wearing sunglasses is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_2.jpg"), + }], +) +image.save("image_Inpaint_2.jpg") + diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py new file mode 100644 index 000000000..ad457b375 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py @@ -0,0 +1,46 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-PandaMeme_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A meme with a sleepy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_sleepy.jpg") +image = template( + pipe, + prompt="A meme with a happy expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_happy.jpg") +image = template( + pipe, + prompt="A meme with a surprised expression.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{}], + negative_template_inputs = [{}], +) +image.save("image_PandaMeme_surprised.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py new file mode 100644 index 000000000..2a9f584cb --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py @@ -0,0 +1,38 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Sharpness_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.1}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{"scale": 0.8}], + negative_template_inputs = [{"scale": 0.5}], +) +image.save("image_Sharpness_0.8.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py new file mode 100644 index 000000000..48865302f --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-SoftRGB")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-SoftRGB_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 128/255, + "G": 128/255, + "B": 128/255 + }], +) +image.save("image_rgb_normal.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 208/255, + "G": 185/255, + "B": 138/255 + }], +) +image.save("image_rgb_warm.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "R": 94/255, + "G": 163/255, + "B": 174/255 + }], +) +image.save("image_rgb_cold.jpg") diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py new file mode 100644 index 000000000..b19c25ccf --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py @@ -0,0 +1,57 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Upscaler")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Upscaler_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_512.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_1.png") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "A cat is sitting on a stone.", + }], + negative_template_inputs = [{ + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), + "prompt": "", + }], +) +image.save("image_Upscaler_2.png") From 13f2618da2b678a3c7bdde61c9ddb976b5e4fd9d Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Mon, 20 Apr 2026 10:56:29 +0800 Subject: [PATCH 05/12] add a new model --- README.md | 1 + README_zh.md | 1 + .../Introducing_Diffusion_Templates.md | 67 ++++++++++++++++++ .../Template_Model_Training.md | 49 ++++++++++++- .../Understanding_Diffusion_Templates.md | 2 +- docs/en/Model_Details/FLUX2.md | 1 + docs/en/README.md | 3 +- docs/en/index.rst | 1 + .../Introducing_Diffusion_Templates.md | 68 +++++++++++++++++++ .../Template_Model_Training.md | 49 ++++++++++++- .../Understanding_Diffusion_Templates.md | 2 +- docs/zh/Model_Details/FLUX2.md | 1 + docs/zh/README.md | 3 +- docs/zh/index.rst | 1 + .../Template-KleinBase4B-ContentRef.py | 52 ++++++++++++++ .../Template-KleinBase4B-ContentRef.py | 63 +++++++++++++++++ .../full/Template-KleinBase4B-ContentRef.sh | 19 ++++++ examples/flux2/model_training/train.py | 2 +- .../Template-KleinBase4B-ContentRef.py | 55 +++++++++++++++ 19 files changed, 433 insertions(+), 7 deletions(-) create mode 100644 docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md create mode 100644 docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py diff --git a/README.md b/README.md index fb905d2b9..253ef872d 100644 --- a/README.md +++ b/README.md @@ -357,6 +357,7 @@ Example code for FLUX.2 is available at: [/examples/flux2/](/examples/flux2/) |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| diff --git a/README_zh.md b/README_zh.md index 5d5a4f7ce..bc66027ff 100644 --- a/README_zh.md +++ b/README_zh.md @@ -357,6 +357,7 @@ FLUX.2 的示例代码位于:[/examples/flux2/](/examples/flux2/) |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| diff --git a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md new file mode 100644 index 000000000..f31775f36 --- /dev/null +++ b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -0,0 +1,67 @@ +# Diffusion Templates + +Diffusion Templates is a controllable generation plugin framework for Diffusion models in DiffSynth-Studio, providing additional controllable generation capabilities for base models. + +* Open Source Code: [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) +* Technical Report: coming soon +* Documentation Reference + * Introducing Diffusion Templates: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html) + * Diffusion Templates Architecture Details: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) + * Template Model Inference: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) + * Template Model Training: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) +* Online Demo: [ModelScope Creative Space](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) +* Models: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) + * Structure Control: [DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) + * Brightness Adjustment: [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) + * Color Adjustment: [DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) + * Image Editing: [DiffSynth-Studio/Template-KleinBase4B-Edit](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) + * Super Resolution: [DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) + * Sharpness Enhancement: [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) + * Aesthetic Alignment: [DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) + * Inpainting: [DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) + * Content Reference: [DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) + * Panda Meme (Easter Egg Model): [DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) +* Datasets: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) + * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) + * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) + * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) + * [DiffSynth-Studio/ImagePulseV2-Edit-Clothes](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes) + * [DiffSynth-Studio/ImagePulseV2-Edit-Pose](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose) + * [DiffSynth-Studio/ImagePulseV2-Edit-Change](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change) + * [DiffSynth-Studio/ImagePulseV2-Edit-AddRemove](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove) + * [DiffSynth-Studio/ImagePulseV2-Edit-Upscale](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale) + * [DiffSynth-Studio/ImagePulseV2-TextImage-Human](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human) + * [DiffSynth-Studio/ImagePulseV2-Edit-Crop](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop) + * [DiffSynth-Studio/ImagePulseV2-Edit-Light](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light) + * [DiffSynth-Studio/ImagePulseV2-Edit-Structure](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure) + * [DiffSynth-Studio/ImagePulseV2-Edit-HumanFace](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace) + * [DiffSynth-Studio/ImagePulseV2-Edit-Angle](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle) + * [DiffSynth-Studio/ImagePulseV2-Edit-Style](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style) + * [DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution) + * [DiffSynth-Studio/ImagePulseV2-Edit-Merge](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge) + +## Model Gallery + +* Super Resolution + Sharpness Enhancement: Generate ultra-high-clarity images + +|Low Resolution Input|High Resolution Output| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png)| + +* Structure Control + Aesthetic Alignment + Sharpness Enhancement: Fully-armed ControlNet + +|Structure Control Image|Output Image| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png)| + +* Structure Control + Image Editing + Color Adjustment: Artistic style creation at will + +|Structure Control Image|Editing Input Image|Output Image| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png)| + +* Brightness Control + Image Editing + Inpainting: Transport elements across dimensions + +|Reference Image|Inpaint Region|Output Image| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| diff --git a/docs/en/Diffusion_Templates/Template_Model_Training.md b/docs/en/Diffusion_Templates/Template_Model_Training.md index d9f1d230f..b32b69cea 100644 --- a/docs/en/Diffusion_Templates/Template_Model_Training.md +++ b/docs/en/Diffusion_Templates/Template_Model_Training.md @@ -228,9 +228,56 @@ TEMPLATE_MODEL = CustomizedTemplateModel Set `--trainable_models template_model.mlp` to train only the MLP component. +### Training on Low VRAM Devices + +The framework supports splitting Template model training into two stages: the first stage performs gradient-free computation, and the second stage performs gradient updates. For more information, refer to the documentation: [Two-stage Split Training](https://diffsynth-studio-doc.readthedocs.io/en/latest/Training/Split_Training.html). Here's a sample script: + +```shell +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:train" +``` + +Two-stage split training can reduce VRAM requirements and improve training speed. The training process is lossless in precision, but requires significant disk space for storing cache files. + +To further reduce VRAM requirements, you can enable fp8 precision by adding the parameters `--fp8_models "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors"` and `--fp8_models "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors"` to the two-stage training. Note that fp8 precision can only be enabled on non-trainable model components and introduces minor errors. + ### Uploading Template Models -After training, follow these steps to upload to ModelScope: +After training, follow these steps to upload Template models to ModelScope for wider distribution. 1. Set model path in `model.py`: ```python diff --git a/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md index 1da52a8fd..900d1115a 100644 --- a/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md +++ b/docs/en/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -1,4 +1,4 @@ -# Understanding Diffusion Templates +# Diffusion Templates Architecture Details The Diffusion Templates framework is a controllable generation plugin framework in DiffSynth-Studio that provides additional controllable generation capabilities for Diffusion models. diff --git a/docs/en/Model_Details/FLUX2.md b/docs/en/Model_Details/FLUX2.md index 60128798b..ba1a754da 100644 --- a/docs/en/Model_Details/FLUX2.md +++ b/docs/en/Model_Details/FLUX2.md @@ -75,6 +75,7 @@ image.save("image.jpg") |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| Special Training Scripts: diff --git a/docs/en/README.md b/docs/en/README.md index e36d21a31..1bd1b5ab7 100644 --- a/docs/en/README.md +++ b/docs/en/README.md @@ -82,7 +82,8 @@ This section introduces the independent core module `diffsynth.core` in `DiffSyn This section introduces the controllable generation plugin framework for Diffusion models, explaining the framework's operation mechanism and how to use Template models for inference and training. -* [Understanding Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Introducing Diffusion Templates](./Diffusion_Templates/Introducing_Diffusion_Templates.md) +* [Diffusion Templates Architecture Details](./Diffusion_Templates/Understanding_Diffusion_Templates.md) * [Template Model Inference](./Diffusion_Templates/Template_Model_Inference.md) * [Template Model Training](./Diffusion_Templates/Template_Model_Training.md) diff --git a/docs/en/index.rst b/docs/en/index.rst index 34c00b687..0e1eecba6 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -64,6 +64,7 @@ Welcome to DiffSynth-Studio's Documentation :maxdepth: 2 :caption: Diffusion Templates + Diffusion_Templates/Introducing_Diffusion_Templates.md Diffusion_Templates/Understanding_Diffusion_Templates.md Diffusion_Templates/Template_Model_Inference.md Diffusion_Templates/Template_Model_Training.md diff --git a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md new file mode 100644 index 000000000..21795e18c --- /dev/null +++ b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -0,0 +1,68 @@ +# Diffusion Templates + +Diffusion Templates 是 DiffSynth-Studio 中的 Diffusion 模型可控生成插件框架,可以为基础模型提供额外的可控生成能力。 + +* 开源代码:[DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) +* 技术报告:coming soon +* 文档参考 + * Diffusion Templates 简介:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html) + * Diffusion Templates 架构详解:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) + * Template 模型推理:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) + * Template 模型训练:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) +* 在线体验:[魔搭社区创空间](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) +* 模型:[合集](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) + * 结构控制:[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) + * 亮度调节:[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) + * 色彩调节:[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) + * 图像编辑:[DiffSynth-Studio/Template-KleinBase4B-Edit](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) + * 超分辨率:[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) + * 锐利激发:[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) + * 美学对齐:[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) + * 局部重绘:[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) + * 内容参考:[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) + * 魔性熊猫(彩蛋模型):[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) +* 数据集:[合集](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) + * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) + * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) + * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) + * [DiffSynth-Studio/ImagePulseV2-Edit-Clothes](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes) + * [DiffSynth-Studio/ImagePulseV2-Edit-Pose](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose) + * [DiffSynth-Studio/ImagePulseV2-Edit-Change](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change) + * [DiffSynth-Studio/ImagePulseV2-Edit-AddRemove](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove) + * [DiffSynth-Studio/ImagePulseV2-Edit-Upscale](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale) + * [DiffSynth-Studio/ImagePulseV2-TextImage-Human](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human) + * [DiffSynth-Studio/ImagePulseV2-Edit-Crop](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop) + * [DiffSynth-Studio/ImagePulseV2-Edit-Light](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light) + * [DiffSynth-Studio/ImagePulseV2-Edit-Structure](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure) + * [DiffSynth-Studio/ImagePulseV2-Edit-HumanFace](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace) + * [DiffSynth-Studio/ImagePulseV2-Edit-Angle](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle) + * [DiffSynth-Studio/ImagePulseV2-Edit-Style](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style) + * [DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution) + * [DiffSynth-Studio/ImagePulseV2-Edit-Merge](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge) + +## 模型效果一览 + +* 超分辨率 + 锐利激发:生成清晰度极高的图像 + +|低清晰度输入|高清晰度输出| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png)| + +* 结构控制 + 美学对齐 + 锐利激发:全副武装的 ControlNet + +|结构控制图|输出图| +|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png)| + +* 结构控制 + 图像编辑 + 色彩调节:随心所欲的艺术风格创作 + +|结构控制图|编辑输入图|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png)| + +* 亮度控制 + 图像编辑 + 局部重绘:让图中的部分元素跨越次元 + +|参考图|重绘区域|输出图| +|-|-|-| +|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| + diff --git a/docs/zh/Diffusion_Templates/Template_Model_Training.md b/docs/zh/Diffusion_Templates/Template_Model_Training.md index a45180db5..b3726ab42 100644 --- a/docs/zh/Diffusion_Templates/Template_Model_Training.md +++ b/docs/zh/Diffusion_Templates/Template_Model_Training.md @@ -239,9 +239,56 @@ TEMPLATE_MODEL = CustomizedTemplateModel 此时需在训练命令中通过参数 `--trainable_models template_model.mlp` 设置为仅训练 `mlp` 部分。 +### 在低显存的设备上训练 + +框架支持将 Template 模型的训练拆分为两阶段,第一阶段进行无梯度计算,第二阶段进行梯度更新,更多信息请参考文档:[两阶段拆分训练](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Training/Split_Training.html),以下是样例脚本: + +```shell +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Brightness/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Brightness/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 1 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:data_process" + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path "./models/train/Template-KleinBase4B-Brightness_full_cache" \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Brightness:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Brightness_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --task "sft:train" +``` + +两阶段拆分训练可以降低显存需求,提高训练速度,训练过程是无损精度的,但需要较大硬盘空间用于存储 Cache 文件。 + +如需进一步减少显存需求,可开启 fp8 精度,在两阶段训练中添加参数 `--fp8_models "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors"` 和 `--fp8_models "black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors"` 即可,fp8 精度只能在非训练模型组件上启用,且存在少量误差。 + ### 上传 Template 模型 -完成训练后,按照以下步骤可上传 Template 模型到魔搭社区 +完成训练后,按照以下步骤可上传 Template 模型到魔搭社区,供更多人下载使用。 Step 1:在 `model.py` 中填入训练好的模型文件名,例如 diff --git a/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md index 622e6a933..183a33805 100644 --- a/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md +++ b/docs/zh/Diffusion_Templates/Understanding_Diffusion_Templates.md @@ -1,4 +1,4 @@ -# 理解 Diffusion Templates +# Diffusion Templates 架构详解 ## 框架结构 diff --git a/docs/zh/Model_Details/FLUX2.md b/docs/zh/Model_Details/FLUX2.md index 16f287238..13381a5fa 100644 --- a/docs/zh/Model_Details/FLUX2.md +++ b/docs/zh/Model_Details/FLUX2.md @@ -75,6 +75,7 @@ image.save("image.jpg") |[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| 特殊训练脚本: diff --git a/docs/zh/README.md b/docs/zh/README.md index e1c463b1c..b1d1acea0 100644 --- a/docs/zh/README.md +++ b/docs/zh/README.md @@ -80,7 +80,8 @@ graph LR; 本节介绍 Diffusion 模型可控生成插件框架 Diffusion Templates,讲解 Diffusion Templates 框架的运行机制,展示如何使用 Template 模型进行推理和训练。 -* [理解 Diffusion Templates](./Diffusion_Templates/Understanding_Diffusion_Templates.md) +* [Diffusion Templates 简介](./Diffusion_Templates/Introducing_Diffusion_Templates.md) +* [Diffusion Templates 架构详解](./Diffusion_Templates/Understanding_Diffusion_Templates.md) * [Template 模型推理](./Diffusion_Templates/Template_Model_Inference.md) * [Template 模型训练](./Diffusion_Templates/Template_Model_Training.md) diff --git a/docs/zh/index.rst b/docs/zh/index.rst index 8042013d3..b3caee76f 100644 --- a/docs/zh/index.rst +++ b/docs/zh/index.rst @@ -64,6 +64,7 @@ :maxdepth: 2 :caption: Diffusion Templates + Diffusion_Templates/Introducing_Diffusion_Templates.md Diffusion_Templates/Understanding_Diffusion_Templates.md Diffusion_Templates/Template_Model_Inference.md Diffusion_Templates/Template_Model_Training.md diff --git a/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py b/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py new file mode 100644 index 000000000..839e16cd6 --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py @@ -0,0 +1,52 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image +import numpy as np + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef")], +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_1.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_2.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py new file mode 100644 index 000000000..6c0dc13c9 --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py @@ -0,0 +1,63 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +from modelscope import dataset_snapshot_download +from PIL import Image +import numpy as np + +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef")], + lazy_loading=True, +) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_1.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_2.jpg") diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh b/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh new file mode 100644 index 000000000..52d3c2d60 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh @@ -0,0 +1,19 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-ContentRef/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ContentRef \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-ContentRef/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-ContentRef:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-ContentRef_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters \ + --enable_lora_hot_loading diff --git a/examples/flux2/model_training/train.py b/examples/flux2/model_training/train.py index 144f41ea1..a1af88921 100644 --- a/examples/flux2/model_training/train.py +++ b/examples/flux2/model_training/train.py @@ -29,7 +29,7 @@ def __init__( tokenizer_config = self.parse_path_or_model_id(tokenizer_path, default_value=ModelConfig(model_id="black-forest-labs/FLUX.2-dev", origin_file_pattern="tokenizer/")) self.pipe = Flux2ImagePipeline.from_pretrained(torch_dtype=torch.bfloat16, device=device, model_configs=model_configs, tokenizer_config=tokenizer_config) self.pipe = self.load_training_template_model(self.pipe, template_model_id_or_path, args.use_gradient_checkpointing, args.use_gradient_checkpointing_offload) - self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model) + self.pipe = self.split_pipeline_units(task, self.pipe, trainable_models, lora_base_model, remove_unnecessary_params=True) if enable_lora_hot_loading: self.pipe.dit = self.pipe.enable_lora_hot_loading(self.pipe.dit) # Training mode diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py new file mode 100644 index 000000000..cdb9405e1 --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py @@ -0,0 +1,55 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch +from modelscope import dataset_snapshot_download +from PIL import Image +import numpy as np + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) # Important! +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-ContentRef_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +dataset_snapshot_download( + "DiffSynth-Studio/examples_in_diffsynth", + allow_file_pattern=["templates/*"], + local_dir="data/examples", +) +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_1.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_1.jpg") +image = template( + pipe, + prompt="A cat is sitting on a stone.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs = [{ + "image": Image.open("data/examples/templates/image_style_2.jpg"), + }], + negative_template_inputs = [{ + "image": Image.fromarray(np.zeros((1024, 1024, 3), dtype=np.uint8) + 128), + }], +) +image.save("image_ContentRef_2.jpg") From b51fac3e0e3d3ef65dfe2f98ef250636242ef558 Mon Sep 17 00:00:00 2001 From: mi804 <1576993271@qq.com> Date: Mon, 20 Apr 2026 11:41:20 +0800 Subject: [PATCH 06/12] template age --- README.md | 31 +++++++------- README_zh.md | 31 +++++++------- docs/en/Model_Details/FLUX2.md | 1 + docs/zh/Model_Details/FLUX2.md | 1 + .../Template-KleinBase4B-Age.py | 31 ++++++++++++++ .../Template-KleinBase4B-Age.py | 42 +++++++++++++++++++ .../full/Template-KleinBase4B-Age.sh | 18 ++++++++ .../validate_full/Template-KleinBase4B-Age.py | 33 +++++++++++++++ 8 files changed, 158 insertions(+), 30 deletions(-) create mode 100644 examples/flux2/model_inference/Template-KleinBase4B-Age.py create mode 100644 examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py create mode 100644 examples/flux2/model_training/full/Template-KleinBase4B-Age.sh create mode 100644 examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py diff --git a/README.md b/README.md index 253ef872d..78d81cc66 100644 --- a/README.md +++ b/README.md @@ -343,21 +343,22 @@ Example code for FLUX.2 is available at: [/examples/flux2/](/examples/flux2/) | Model ID | Inference | Low-VRAM Inference | Full Training | Full Training Validation | LoRA Training | LoRA Training Validation | |-|-|-|-|-|-|-| -|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| -|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| -|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| -|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| -|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| -|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| +|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| +|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| +|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| +|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| +|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Age](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Age.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| diff --git a/README_zh.md b/README_zh.md index bc66027ff..fbe48939b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -343,21 +343,22 @@ FLUX.2 的示例代码位于:[/examples/flux2/](/examples/flux2/) |模型 ID|推理|低显存推理|全量训练|全量训练后验证|LoRA 训练|LoRA 训练后验证| |-|-|-|-|-|-|-| -|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-dev.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| -|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| -|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| -|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| -|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| -|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| -|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| +|[black-forest-labs/FLUX.2-dev](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-dev)|[code](/examples/flux2/model_inference/FLUX.2-dev.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-dev.py)|-|-|[code](/examples/flux2/model_training/lora/FLUX.2-dev.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-dev.py)| +|[black-forest-labs/FLUX.2-klein-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-4B.py)| +|[black-forest-labs/FLUX.2-klein-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-9B.py)| +|[black-forest-labs/FLUX.2-klein-base-4B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-4B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-4B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-4B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-4B.py)| +|[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| +|[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Age](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Age.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)|-|-| diff --git a/docs/en/Model_Details/FLUX2.md b/docs/en/Model_Details/FLUX2.md index ba1a754da..63736eb35 100644 --- a/docs/en/Model_Details/FLUX2.md +++ b/docs/en/Model_Details/FLUX2.md @@ -68,6 +68,7 @@ image.save("image.jpg") |[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| |[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Age](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| diff --git a/docs/zh/Model_Details/FLUX2.md b/docs/zh/Model_Details/FLUX2.md index 13381a5fa..c3b85e500 100644 --- a/docs/zh/Model_Details/FLUX2.md +++ b/docs/zh/Model_Details/FLUX2.md @@ -68,6 +68,7 @@ image.save("image.jpg") |[black-forest-labs/FLUX.2-klein-base-9B](https://www.modelscope.cn/models/black-forest-labs/FLUX.2-klein-base-9B)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/FLUX.2-klein-base-9B.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/lora/FLUX.2-klein-base-9B.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_lora/FLUX.2-klein-base-9B.py)| |[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)|-|-| +|[DiffSynth-Studio/Template-KleinBase4B-Age](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Edit](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)|-|-| |[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://www.modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)|-|-| diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Age.py b/examples/flux2/model_inference/Template-KleinBase4B-Age.py new file mode 100644 index 000000000..e01268e1b --- /dev/null +++ b/examples/flux2/model_inference/Template-KleinBase4B-Age.py @@ -0,0 +1,31 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")], +) +prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality"# prompt = "Full body photograph of a single woman standing, looking directly at the camera, centered composition, plain neutral gray background, soft even studio lighting, wearing a plain white short-sleeve t-shirt and blue jeans, barefoot, arms resting naturally at sides, straight black shoulder-length hair, photorealistic, high quality" +negative_age = 45 +for age in range(10, 91, 5): + print(f"Generating age {age}...") + image = template( + pipe, + prompt=prompt, + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": age}], + negative_template_inputs=[{"age": negative_age}], + ) + image.save(f"image_age_{age}.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py new file mode 100644 index 000000000..8cc6342ec --- /dev/null +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py @@ -0,0 +1,42 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +import torch +vram_config = { + "offload_dtype": "disk", + "offload_device": "disk", + "onload_dtype": torch.float8_e4m3fn, + "onload_device": "cpu", + "preparing_dtype": torch.float8_e4m3fn, + "preparing_device": "cuda", + "computation_dtype": torch.bfloat16, + "computation_device": "cuda", +} +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), + vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5, +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")], + lazy_loading=True, +) +prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality" +negative_age = 45 +for age in range(10, 91, 5): + print(f"Generating age {age}...") + image = template( + pipe, + prompt=prompt, + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": age}], + negative_template_inputs=[{"age": negative_age}], + ) + image.save(f"image_age_{age}.jpg") diff --git a/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh b/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh new file mode 100644 index 000000000..d90301408 --- /dev/null +++ b/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh @@ -0,0 +1,18 @@ +modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-Age/*" --local_dir ./data/diffsynth_example_dataset + +accelerate launch examples/flux2/model_training/train.py \ + --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Age \ + --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-Age/metadata.jsonl \ + --extra_inputs "template_inputs" \ + --max_pixels 1048576 \ + --dataset_repeat 50 \ + --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \ + --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-Age:" \ + --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \ + --learning_rate 1e-4 \ + --num_epochs 2 \ + --remove_prefix_in_ckpt "pipe.template_model." \ + --output_path "./models/train/Template-KleinBase4B-Age_full" \ + --trainable_models "template_model" \ + --use_gradient_checkpointing \ + --find_unused_parameters diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py new file mode 100644 index 000000000..725e238ae --- /dev/null +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py @@ -0,0 +1,33 @@ +from diffsynth.diffusion.template import TemplatePipeline +from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from diffsynth.core import load_state_dict +import torch + +pipe = Flux2ImagePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"), + ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), + ], + tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"), +) +template = TemplatePipeline.from_pretrained( + torch_dtype=torch.bfloat16, + device="cuda", + model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")], +) +state_dict = load_state_dict("./models/train/Template-KleinBase4B-Age_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) +template.models[0].load_state_dict(state_dict) +prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality"# prompt = "Full body photograph of a single woman standing, looking directly at the camera, centered composition, plain neutral gray background, soft even studio lighting, wearing a plain white short-sleeve t-shirt and blue jeans, barefoot, arms resting naturally at sides, straight black shoulder-length hair, photorealistic, high quality" +negative_age = 45 +for age in [10, 35, 70]: + print(f"Generating age {age}...") + image = template( + pipe, + prompt=prompt, + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": age}], + ) + image.save(f"image_age_{age}.jpg") From 5e10e11dfc25e557cb3cee1773d94e8e193ad9cc Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Mon, 20 Apr 2026 14:31:26 +0800 Subject: [PATCH 07/12] refine code --- .../Template-KleinBase4B-Age.py | 36 ++++++++++++------ .../Template-KleinBase4B-Age.py | 37 +++++++++++++------ 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/examples/flux2/model_inference/Template-KleinBase4B-Age.py b/examples/flux2/model_inference/Template-KleinBase4B-Age.py index e01268e1b..f1d1bef67 100644 --- a/examples/flux2/model_inference/Template-KleinBase4B-Age.py +++ b/examples/flux2/model_inference/Template-KleinBase4B-Age.py @@ -17,15 +17,27 @@ device="cuda", model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")], ) -prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality"# prompt = "Full body photograph of a single woman standing, looking directly at the camera, centered composition, plain neutral gray background, soft even studio lighting, wearing a plain white short-sleeve t-shirt and blue jeans, barefoot, arms resting naturally at sides, straight black shoulder-length hair, photorealistic, high quality" -negative_age = 45 -for age in range(10, 91, 5): - print(f"Generating age {age}...") - image = template( - pipe, - prompt=prompt, - seed=0, cfg_scale=4, num_inference_steps=50, - template_inputs=[{"age": age}], - negative_template_inputs=[{"age": negative_age}], - ) - image.save(f"image_age_{age}.jpg") +image = template( + pipe, + prompt="A portrait of a woman with black hair, wearing a suit.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": 20}], + negative_template_inputs=[{"age": 45}], +) +image.save(f"image_age_20.jpg") +image = template( + pipe, + prompt="A portrait of a woman with black hair, wearing a suit.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": 50}], + negative_template_inputs=[{"age": 45}], +) +image.save(f"image_age_50.jpg") +image = template( + pipe, + prompt="A portrait of a woman with black hair, wearing a suit.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": 80}], + negative_template_inputs=[{"age": 45}], +) +image.save(f"image_age_80.jpg") diff --git a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py index 8cc6342ec..6fbbdf0ab 100644 --- a/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py +++ b/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py @@ -1,6 +1,7 @@ from diffsynth.diffusion.template import TemplatePipeline from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig import torch + vram_config = { "offload_dtype": "disk", "offload_device": "disk", @@ -28,15 +29,27 @@ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age")], lazy_loading=True, ) -prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality" -negative_age = 45 -for age in range(10, 91, 5): - print(f"Generating age {age}...") - image = template( - pipe, - prompt=prompt, - seed=0, cfg_scale=4, num_inference_steps=50, - template_inputs=[{"age": age}], - negative_template_inputs=[{"age": negative_age}], - ) - image.save(f"image_age_{age}.jpg") +image = template( + pipe, + prompt="A portrait of a woman with black hair, wearing a suit.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": 20}], + negative_template_inputs=[{"age": 45}], +) +image.save(f"image_age_20.jpg") +image = template( + pipe, + prompt="A portrait of a woman with black hair, wearing a suit.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": 50}], + negative_template_inputs=[{"age": 45}], +) +image.save(f"image_age_50.jpg") +image = template( + pipe, + prompt="A portrait of a woman with black hair, wearing a suit.", + seed=0, cfg_scale=4, num_inference_steps=50, + template_inputs=[{"age": 80}], + negative_template_inputs=[{"age": 45}], +) +image.save(f"image_age_80.jpg") From 89cb3f5b5d23669f5b751be2b87ab478ff4c5610 Mon Sep 17 00:00:00 2001 From: mi804 <1576993271@qq.com> Date: Mon, 20 Apr 2026 15:20:23 +0800 Subject: [PATCH 08/12] minor fix age --- .../model_training/validate_full/Template-KleinBase4B-Age.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py index 725e238ae..d250cdefc 100644 --- a/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py +++ b/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py @@ -20,7 +20,7 @@ ) state_dict = load_state_dict("./models/train/Template-KleinBase4B-Age_full/epoch-1.safetensors", torch_dtype=torch.bfloat16) template.models[0].load_state_dict(state_dict) -prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality"# prompt = "Full body photograph of a single woman standing, looking directly at the camera, centered composition, plain neutral gray background, soft even studio lighting, wearing a plain white short-sleeve t-shirt and blue jeans, barefoot, arms resting naturally at sides, straight black shoulder-length hair, photorealistic, high quality" +prompt = "Half body color photograph of a single woman, head and torso with visible arms and hands resting gently in front of the body, looking directly at the camera, centered composition, colorful studio background with soft gradient of warm pastel tones, vibrant studio lighting, wearing a plain red short-sleeve t-shirt, straight black shoulder-length hair, photorealistic, high quality" negative_age = 45 for age in [10, 35, 70]: print(f"Generating age {age}...") From c1e25e65bbc9137be4c40ae16d83688b5d4aa0cf Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Tue, 21 Apr 2026 15:46:53 +0800 Subject: [PATCH 09/12] update docs --- .../Introducing_Diffusion_Templates.md | 1 + .../Template_Model_Inference.md | 41 ++++++++++--------- .../Template_Model_Training.md | 2 +- .../Introducing_Diffusion_Templates.md | 1 + .../Template_Model_Inference.md | 39 ++++++++++-------- .../Template_Model_Training.md | 2 +- 6 files changed, 47 insertions(+), 39 deletions(-) diff --git a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md index f31775f36..b4b0afdb0 100644 --- a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -20,6 +20,7 @@ Diffusion Templates is a controllable generation plugin framework for Diffusion * Aesthetic Alignment: [DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) * Inpainting: [DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) * Content Reference: [DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) + * Age Control: [DiffSynth-Studio/Template-KleinBase4B-Age](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) * Panda Meme (Easter Egg Model): [DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) * Datasets: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) diff --git a/docs/en/Diffusion_Templates/Template_Model_Inference.md b/docs/en/Diffusion_Templates/Template_Model_Inference.md index 8e1a0b022..596d2b9df 100644 --- a/docs/en/Diffusion_Templates/Template_Model_Inference.md +++ b/docs/en/Diffusion_Templates/Template_Model_Inference.md @@ -29,7 +29,7 @@ image = pipe( image.save("image.png") ``` -The Template model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) can control image brightness during generation. Through the `TemplatePipeline` model, it can be loaded from ModelScope (via `ModelConfig(model_id="xxx/xxx")`) or from a local path (via `ModelConfig(path="xxx")`). Inputting `scale=0.8` increases image brightness. Note that in the code, input parameters for `pipe` must be transferred to `template_pipeline`, and `template_inputs` should be added. +The Template model [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) can control image brightness during generation. Through the `TemplatePipeline` model, it can be loaded from ModelScope (via `ModelConfig(model_id="xxx/xxx")`) or from a local path (via `ModelConfig(path="xxx")`). Inputting `scale=0.8` increases image brightness. Note that in the code, input parameters for `pipe` must be transferred to `template_pipeline`, and `template_inputs` should be added. ```python # Load Template model @@ -37,7 +37,7 @@ template_pipeline = TemplatePipeline.from_pretrained( torch_dtype=torch.bfloat16, device="cuda", model_configs=[ - ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness") ], ) # Generate an image @@ -53,7 +53,7 @@ image.save("image_0.8.png") ## CFG Enhancement for Template Models -Template models can enable CFG (Classifier-Free Guidance) to make control effects more pronounced. For example, with the model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness), adding `negative_template_inputs` to the TemplatePipeline input parameters and setting its scale to 0.5 will generate images with more noticeable brightness variations by contrasting both sides. +Template models can enable CFG (Classifier-Free Guidance) to make control effects more pronounced. For example, with the model [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness), adding `negative_template_inputs` to the TemplatePipeline input parameters and setting its scale to 0.5 will generate images with more noticeable brightness variations by contrasting both sides. ```python # Generate an image with CFG @@ -77,7 +77,7 @@ template_pipeline = TemplatePipeline.from_pretrained( torch_dtype=torch.bfloat16, device="cuda", model_configs=[ - ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness") ], lazy_loading=True, ) @@ -85,7 +85,7 @@ template_pipeline = TemplatePipeline.from_pretrained( The base model's Pipeline and Template Pipeline are completely independent and can enable VRAM management on demand. -When Template model outputs contain LoRA in Template Cache, you need to enable VRAM management for the base model's Pipeline or enable LoRA hot loading (using the code below), otherwise LoRA weights will be叠加. +When Template model outputs contain LoRA in Template Cache, you need to enable VRAM management for the base model's Pipeline or enable LoRA hot loading (using the code below), otherwise LoRA weights will be fused repeatedly. ```python pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) @@ -100,6 +100,7 @@ After enabling VRAM management for the base model's Pipeline and lazy loading fo ```python from diffsynth.diffusion.template import TemplatePipeline from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from modelscope import dataset_snapshot_download import torch from PIL import Image @@ -137,6 +138,8 @@ template = TemplatePipeline.from_pretrained( ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness"), ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint"), ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age"), ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme"), ], ) @@ -154,7 +157,7 @@ image = template( template_inputs = [ { "model_id": 3, - "image": Image.open("data/assets/image_lowres_100.jpg"), + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), "prompt": "A cat is sitting on a stone.", }, { @@ -165,7 +168,7 @@ image = template( negative_template_inputs = [ { "model_id": 3, - "image": Image.open("data/assets/image_lowres_100.jpg"), + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), "prompt": "", }, { @@ -193,7 +196,7 @@ image = template( template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", }, { @@ -210,7 +213,7 @@ image = template( negative_template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "", }, { @@ -244,12 +247,12 @@ image = template( template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "A cat is sitting on a stone. Colored ink painting.", }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "Convert the image style to colored ink painting.", }, { @@ -262,12 +265,12 @@ image = template( negative_template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "", }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "", }, ], @@ -295,13 +298,13 @@ image = template( }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "Convert the image style to flat anime style.", }, { "model_id": 6, - "image": Image.open("data/assets/image_reference.jpg"), - "mask": Image.open("data/assets/image_mask_1.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), "force_inpaint": True, }, ], @@ -312,13 +315,13 @@ image = template( }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "", }, { "model_id": 6, - "image": Image.open("data/assets/image_reference.jpg"), - "mask": Image.open("data/assets/image_mask_1.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), }, ], ) diff --git a/docs/en/Diffusion_Templates/Template_Model_Training.md b/docs/en/Diffusion_Templates/Template_Model_Training.md index b32b69cea..485c29d29 100644 --- a/docs/en/Diffusion_Templates/Template_Model_Training.md +++ b/docs/en/Diffusion_Templates/Template_Model_Training.md @@ -101,7 +101,7 @@ TEMPLATE_MODEL_PATH = None To train Template models with DiffSynth-Studio, datasets should contain `template_inputs` fields in `metadata.json`. These fields pass through `TEMPLATE_DATA_PROCESSOR` to generate inputs for Template model methods. -For example, the brightness control model [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) takes `scale` as input: +For example, the brightness control model [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) takes `scale` as input: ```json [ diff --git a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md index 21795e18c..e6ccbbc8c 100644 --- a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -20,6 +20,7 @@ Diffusion Templates 是 DiffSynth-Studio 中的 Diffusion 模型可控生成插 * 美学对齐:[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) * 局部重绘:[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) * 内容参考:[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) + * 年龄控制:[DiffSynth-Studio/Template-KleinBase4B-Age](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) * 魔性熊猫(彩蛋模型):[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) * 数据集:[合集](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) diff --git a/docs/zh/Diffusion_Templates/Template_Model_Inference.md b/docs/zh/Diffusion_Templates/Template_Model_Inference.md index 8fdd8e648..4ba299417 100644 --- a/docs/zh/Diffusion_Templates/Template_Model_Inference.md +++ b/docs/zh/Diffusion_Templates/Template_Model_Inference.md @@ -29,7 +29,7 @@ image = pipe( image.save("image.png") ``` -Template 模型 [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) 可以控制模型生成图像的亮度。通过 `TemplatePipeline` 模型,可从魔搭模型库加载(`ModelConfig(model_id="xxx/xxx")`)或从本地路径加载(`ModelConfig(path="xxx")`)。输入 scale=0.8 提高图像的亮度。注意在代码中,需将 `pipe` 的输入参数转移到 `template_pipeline` 中,并添加 `template_inputs`。 +Template 模型 [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) 可以控制模型生成图像的亮度。通过 `TemplatePipeline` 模型,可从魔搭模型库加载(`ModelConfig(model_id="xxx/xxx")`)或从本地路径加载(`ModelConfig(path="xxx")`)。输入 scale=0.8 提高图像的亮度。注意在代码中,需将 `pipe` 的输入参数转移到 `template_pipeline` 中,并添加 `template_inputs`。 ```python # Load Template model @@ -37,7 +37,7 @@ template_pipeline = TemplatePipeline.from_pretrained( torch_dtype=torch.bfloat16, device="cuda", model_configs=[ - ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness") ], ) # Generate an image @@ -53,7 +53,7 @@ image.save("image_0.8.png") ## Template 模型的 CFG 增强 -Template 模型可以开启 CFG(Classifier-Free Guidance),使其控制效果更明显。例如模型 [DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness),在 `TemplatePipeline` 的输入参数中添加 `negative_template_inputs` 并将其 scale 设置为 0.5,模型就会对比两侧的差异,生成亮度变化更明显的图像。 +Template 模型可以开启 CFG(Classifier-Free Guidance),使其控制效果更明显。例如模型 [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness),在 `TemplatePipeline` 的输入参数中添加 `negative_template_inputs` 并将其 scale 设置为 0.5,模型就会对比两侧的差异,生成亮度变化更明显的图像。 ```python # Generate an image with CFG @@ -77,7 +77,7 @@ template_pipeline = TemplatePipeline.from_pretrained( torch_dtype=torch.bfloat16, device="cuda", model_configs=[ - ModelConfig(model_id="DiffSynth-Studio/F2KB4B-Template-Brightness") + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Brightness") ], lazy_loading=True, ) @@ -100,6 +100,7 @@ pipe.dit = pipe.enable_lora_hot_loading(pipe.dit) ```python from diffsynth.diffusion.template import TemplatePipeline from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig +from modelscope import dataset_snapshot_download import torch from PIL import Image @@ -137,6 +138,8 @@ template = TemplatePipeline.from_pretrained( ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Sharpness"), ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Inpaint"), ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Aesthetic"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-ContentRef"), + ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-Age"), ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme"), ], ) @@ -154,7 +157,7 @@ image = template( template_inputs = [ { "model_id": 3, - "image": Image.open("data/assets/image_lowres_100.jpg"), + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), "prompt": "A cat is sitting on a stone.", }, { @@ -165,7 +168,7 @@ image = template( negative_template_inputs = [ { "model_id": 3, - "image": Image.open("data/assets/image_lowres_100.jpg"), + "image": Image.open("data/examples/templates/image_lowres_100.jpg"), "prompt": "", }, { @@ -193,7 +196,7 @@ image = template( template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "A cat is sitting on a stone, bathed in bright sunshine.", }, { @@ -210,7 +213,7 @@ image = template( negative_template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "", }, { @@ -244,12 +247,12 @@ image = template( template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "A cat is sitting on a stone. Colored ink painting.", }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "Convert the image style to colored ink painting.", }, { @@ -262,12 +265,12 @@ image = template( negative_template_inputs = [ { "model_id": 1, - "image": Image.open("data/assets/image_depth.jpg"), + "image": Image.open("data/examples/templates/image_depth.jpg"), "prompt": "", }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "", }, ], @@ -295,13 +298,13 @@ image = template( }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "Convert the image style to flat anime style.", }, { "model_id": 6, - "image": Image.open("data/assets/image_reference.jpg"), - "mask": Image.open("data/assets/image_mask_1.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), "force_inpaint": True, }, ], @@ -312,13 +315,13 @@ image = template( }, { "model_id": 2, - "image": Image.open("data/assets/image_reference.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), "prompt": "", }, { "model_id": 6, - "image": Image.open("data/assets/image_reference.jpg"), - "mask": Image.open("data/assets/image_mask_1.jpg"), + "image": Image.open("data/examples/templates/image_reference.jpg"), + "mask": Image.open("data/examples/templates/image_mask_1.jpg"), }, ], ) diff --git a/docs/zh/Diffusion_Templates/Template_Model_Training.md b/docs/zh/Diffusion_Templates/Template_Model_Training.md index b3726ab42..3f3200b28 100644 --- a/docs/zh/Diffusion_Templates/Template_Model_Training.md +++ b/docs/zh/Diffusion_Templates/Template_Model_Training.md @@ -101,7 +101,7 @@ TEMPLATE_MODEL_PATH = None 如需使用 DiffSynth-Studio 训练 Template 模型,则需构建训练数据集,数据集中的 `metadata.json` 包含 `template_inputs` 字段。`metadata.json` 中的 `template_inputs` 并不是直接输入给 Template 模型 `process_inputs` 的参数,而是提供给 `TEMPLATE_DATA_PROCESSOR` 的输入参数,由 `TEMPLATE_DATA_PROCESSOR` 计算出输入给 Template 模型 `process_inputs` 的参数。 -例如,[DiffSynth-Studio/F2KB4B-Template-Brightness](https://modelscope.cn/models/DiffSynth-Studio/F2KB4B-Template-Brightness) 这一亮度控制模型的输入参数是 `scale`,即图像的亮度数值。`scale` 可以直接写在 `metadata.json` 中,此时 `TEMPLATE_DATA_PROCESSOR` 只需要传递参数: +例如,[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) 这一亮度控制模型的输入参数是 `scale`,即图像的亮度数值。`scale` 可以直接写在 `metadata.json` 中,此时 `TEMPLATE_DATA_PROCESSOR` 只需要传递参数: ```json [ From eb208fc52da7952e451ac0cc0014e184fa98e690 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Tue, 21 Apr 2026 21:29:45 +0800 Subject: [PATCH 10/12] update docs --- docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md | 1 + docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md index b4b0afdb0..a1ce2d5ee 100644 --- a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -9,6 +9,7 @@ Diffusion Templates is a controllable generation plugin framework for Diffusion * Diffusion Templates Architecture Details: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) * Template Model Inference: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) * Template Model Training: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) + * For template training code of the FLUX.2 series models, please refer to [FLUX.2](../Model_Details/FLUX2.md#model-overview). * Online Demo: [ModelScope Creative Space](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) * Models: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) * Structure Control: [DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) diff --git a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md index e6ccbbc8c..8837c5b1d 100644 --- a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -9,6 +9,7 @@ Diffusion Templates 是 DiffSynth-Studio 中的 Diffusion 模型可控生成插 * Diffusion Templates 架构详解:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) * Template 模型推理:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) * Template 模型训练:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) + * FLUX.2 系列模型的 Templates 训练样例代码请参考 [FLUX.2](../Model_Details/FLUX2.md#模型总览) * 在线体验:[魔搭社区创空间](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) * 模型:[合集](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) * 结构控制:[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) From 13eb32980ab7634160cf5dfd30072891c98c8059 Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Tue, 21 Apr 2026 21:30:24 +0800 Subject: [PATCH 11/12] update docs --- docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md | 2 +- docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md index a1ce2d5ee..1e32b9b00 100644 --- a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -23,7 +23,7 @@ Diffusion Templates is a controllable generation plugin framework for Diffusion * Content Reference: [DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) * Age Control: [DiffSynth-Studio/Template-KleinBase4B-Age](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) * Panda Meme (Easter Egg Model): [DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) -* Datasets: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) +* Datasets: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2) * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) diff --git a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md index 8837c5b1d..070335846 100644 --- a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -23,7 +23,7 @@ Diffusion Templates 是 DiffSynth-Studio 中的 Diffusion 模型可控生成插 * 内容参考:[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) * 年龄控制:[DiffSynth-Studio/Template-KleinBase4B-Age](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) * 魔性熊猫(彩蛋模型):[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) -* 数据集:[合集](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2--shujuji) +* 数据集:[合集](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2) * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) From d90e036865efb1e618fbdde8c0ec8184c08316fe Mon Sep 17 00:00:00 2001 From: Artiprocher Date: Thu, 23 Apr 2026 13:53:11 +0800 Subject: [PATCH 12/12] update docs --- .../Introducing_Diffusion_Templates.md | 96 ++++++++++--------- .../Introducing_Diffusion_Templates.md | 69 ++++++------- 2 files changed, 88 insertions(+), 77 deletions(-) diff --git a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md index 1e32b9b00..3c628dc0f 100644 --- a/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/en/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -2,68 +2,74 @@ Diffusion Templates is a controllable generation plugin framework for Diffusion models in DiffSynth-Studio, providing additional controllable generation capabilities for base models. -* Open Source Code: [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) -* Technical Report: coming soon -* Documentation Reference - * Introducing Diffusion Templates: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html) - * Diffusion Templates Architecture Details: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) - * Template Model Inference: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) - * Template Model Training: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html), [中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) - * For template training code of the FLUX.2 series models, please refer to [FLUX.2](../Model_Details/FLUX2.md#model-overview). -* Online Demo: [ModelScope Creative Space](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) -* Models: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) - * Structure Control: [DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) - * Brightness Adjustment: [DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) - * Color Adjustment: [DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) - * Image Editing: [DiffSynth-Studio/Template-KleinBase4B-Edit](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) - * Super Resolution: [DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) - * Sharpness Enhancement: [DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) - * Aesthetic Alignment: [DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) - * Inpainting: [DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) - * Content Reference: [DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) - * Age Control: [DiffSynth-Studio/Template-KleinBase4B-Age](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) - * Panda Meme (Easter Egg Model): [DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) -* Datasets: [Collection](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2) - * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) - * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) - * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) - * [DiffSynth-Studio/ImagePulseV2-Edit-Clothes](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes) - * [DiffSynth-Studio/ImagePulseV2-Edit-Pose](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose) - * [DiffSynth-Studio/ImagePulseV2-Edit-Change](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change) - * [DiffSynth-Studio/ImagePulseV2-Edit-AddRemove](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove) - * [DiffSynth-Studio/ImagePulseV2-Edit-Upscale](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale) - * [DiffSynth-Studio/ImagePulseV2-TextImage-Human](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human) - * [DiffSynth-Studio/ImagePulseV2-Edit-Crop](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop) - * [DiffSynth-Studio/ImagePulseV2-Edit-Light](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light) - * [DiffSynth-Studio/ImagePulseV2-Edit-Structure](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure) - * [DiffSynth-Studio/ImagePulseV2-Edit-HumanFace](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace) - * [DiffSynth-Studio/ImagePulseV2-Edit-Angle](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle) - * [DiffSynth-Studio/ImagePulseV2-Edit-Style](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style) - * [DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution) - * [DiffSynth-Studio/ImagePulseV2-Edit-Merge](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge) +* Open-source code: [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) +* Technical report: coming soon +* Documentation reference + * Introduction to Diffusion Templates: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Introducing_Diffusion_Templates.html) + * Detailed Architecture of Diffusion Templates: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) + * Template Model Inference: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) + * Template Model Training: [English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) +* Online experience: [ModelScope Studio](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) +* Model collection: [ModelScope](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates)、[ModelScope International](https://modelscope.ai/collections/DiffSynth-Studio/KleinBase4B-Templates)、[HuggingFace](https://huggingface.co/collections/DiffSynth-Studio/kleinbase4b-templates) -## Model Gallery +|Model Name|ModelScope|ModelScope International|HuggingFace|Inference Code|Low VRAM Inference Code|Training Code|Training Validation Code| +|-|-|-|-|-|-|-|-| +|Structure Control|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)| +|Brightness Adjustment|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)| +|Color Adjustment|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)| +|Image Editing|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)| +|Super-Resolution|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)| +|Sharpness Enhancement|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)| +|Aesthetic Alignment|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)| +|Local Redrawing|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)| +|Content Reference|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)| +|Age Control|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Age)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py)| +|Panda Meme (Easter Egg Model)|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)| -* Super Resolution + Sharpness Enhancement: Generate ultra-high-clarity images +* Dataset: [ModelScope](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2)、[ModelScope International](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2)、[HuggingFace](https://huggingface.co/collections/DiffSynth-Studio/imagepulsev2) + +|Dataset Name|ModelScope|ModelScope International|HuggingFace| +|-|-|-|-| +|Text-to-Image|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-TextImage)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-TextImage)| +|Local Redrawing|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint)| +|Background Replacement|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background)| +|Clothing Replacement|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes)| +|Pose Adjustment|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose)| +|Foreground Modification|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change)| +|Local Addition/Removal|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove)| +|Super-Resolution|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale)| +|Portrait Generation|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human)| +|Random Cropping|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop)| +|Lighting Adjustment|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light)| +|Scene Structure|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure)| +|Facial Expression Editing|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace)| +|View Angle Adjustment|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle)| +|Style Transfer|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style)| +|Multi-Resolution|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution)| +|Multi-Image Merge|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge)| + +## Model Performance Overview + +* Super-Resolution + Sharpness Enhancement: Generate ultra-high-resolution images |Low Resolution Input|High Resolution Output| |-|-| |![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_lowres_100.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Upscaler_Sharpness.png)| -* Structure Control + Aesthetic Alignment + Sharpness Enhancement: Fully-armed ControlNet +* Structure Control + Aesthetic Alignment + Sharpness Enhancement: Fully-equipped ControlNet |Structure Control Image|Output Image| |-|-| |![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Aesthetic_Sharpness.png)| -* Structure Control + Image Editing + Color Adjustment: Artistic style creation at will +* Structure Control + Image Editing + Color Adjustment: Artistic Style Creation at Will |Structure Control Image|Editing Input Image|Output Image| |-|-|-| |![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_depth.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Controlnet_Edit_SoftRGB.png)| -* Brightness Control + Image Editing + Inpainting: Transport elements across dimensions +* Brightness Control + Image Editing + Local Redrawing: Cross-dimensional Elements in Images -|Reference Image|Inpaint Region|Output Image| +|Reference Image|Redrawing Area|Output Image| |-|-|-| |![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| diff --git a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md index 070335846..85d711b5b 100644 --- a/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md +++ b/docs/zh/Diffusion_Templates/Introducing_Diffusion_Templates.md @@ -9,38 +9,44 @@ Diffusion Templates 是 DiffSynth-Studio 中的 Diffusion 模型可控生成插 * Diffusion Templates 架构详解:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Understanding_Diffusion_Templates.html) * Template 模型推理:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Inference.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Inference.html) * Template 模型训练:[English Version](https://diffsynth-studio-doc.readthedocs.io/en/latest/Diffusion_Templates/Template_Model_Training.html)、[中文版](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/Diffusion_Templates/Template_Model_Training.html) - * FLUX.2 系列模型的 Templates 训练样例代码请参考 [FLUX.2](../Model_Details/FLUX2.md#模型总览) * 在线体验:[魔搭社区创空间](https://modelscope.cn/studios/DiffSynth-Studio/Diffusion-Templates) -* 模型:[合集](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates) - * 结构控制:[DiffSynth-Studio/Template-KleinBase4B-ControlNet](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet) - * 亮度调节:[DiffSynth-Studio/Template-KleinBase4B-Brightness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness) - * 色彩调节:[DiffSynth-Studio/Template-KleinBase4B-SoftRGB](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB) - * 图像编辑:[DiffSynth-Studio/Template-KleinBase4B-Edit](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit) - * 超分辨率:[DiffSynth-Studio/Template-KleinBase4B-Upscaler](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler) - * 锐利激发:[DiffSynth-Studio/Template-KleinBase4B-Sharpness](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness) - * 美学对齐:[DiffSynth-Studio/Template-KleinBase4B-Aesthetic](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic) - * 局部重绘:[DiffSynth-Studio/Template-KleinBase4B-Inpaint](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint) - * 内容参考:[DiffSynth-Studio/Template-KleinBase4B-ContentRef](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) - * 年龄控制:[DiffSynth-Studio/Template-KleinBase4B-Age](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef) - * 魔性熊猫(彩蛋模型):[DiffSynth-Studio/Template-KleinBase4B-PandaMeme](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme) -* 数据集:[合集](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2) - * [DiffSynth-Studio/ImagePulseV2-Edit-Inpaint](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint) - * [DiffSynth-Studio/ImagePulseV2-TextImage](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage) - * [DiffSynth-Studio/ImagePulseV2-Edit-Background](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background) - * [DiffSynth-Studio/ImagePulseV2-Edit-Clothes](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes) - * [DiffSynth-Studio/ImagePulseV2-Edit-Pose](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose) - * [DiffSynth-Studio/ImagePulseV2-Edit-Change](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change) - * [DiffSynth-Studio/ImagePulseV2-Edit-AddRemove](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove) - * [DiffSynth-Studio/ImagePulseV2-Edit-Upscale](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale) - * [DiffSynth-Studio/ImagePulseV2-TextImage-Human](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human) - * [DiffSynth-Studio/ImagePulseV2-Edit-Crop](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop) - * [DiffSynth-Studio/ImagePulseV2-Edit-Light](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light) - * [DiffSynth-Studio/ImagePulseV2-Edit-Structure](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure) - * [DiffSynth-Studio/ImagePulseV2-Edit-HumanFace](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace) - * [DiffSynth-Studio/ImagePulseV2-Edit-Angle](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle) - * [DiffSynth-Studio/ImagePulseV2-Edit-Style](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style) - * [DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution) - * [DiffSynth-Studio/ImagePulseV2-Edit-Merge](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge) +* 模型集:[ModelScope](https://modelscope.cn/collections/DiffSynth-Studio/KleinBase4B-Templates)、[ModelScope 国际站](https://modelscope.ai/collections/DiffSynth-Studio/KleinBase4B-Templates)、[HuggingFace](https://huggingface.co/collections/DiffSynth-Studio/kleinbase4b-templates) + +|模型名称|ModelScope|ModelScope 国际站|HuggingFace|推理代码|低显存推理代码|训练代码|训练效果验证代码| +|-|-|-|-|-|-|-|-| +|结构控制|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-ControlNet)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ControlNet.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ControlNet.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ControlNet.py)| +|亮度调节|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Brightness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Brightness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Brightness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Brightness.py)| +|色彩调节|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-SoftRGB)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-SoftRGB.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-SoftRGB.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-SoftRGB.py)| +|图像编辑|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Edit)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Edit)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Edit.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Edit.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Edit.py)| +|超分辨率|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Upscaler)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Upscaler.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Upscaler.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Upscaler.py)| +|锐利激发|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Sharpness)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Sharpness.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Sharpness.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Sharpness.py)| +|美学对齐|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Aesthetic)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Aesthetic.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Aesthetic.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Aesthetic.py)| +|局部重绘|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Inpaint)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Inpaint.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Inpaint.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Inpaint.py)| +|内容参考|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-ContentRef)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-ContentRef.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-ContentRef.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-ContentRef.py)| +|年龄控制|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-Age)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-Age)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-Age.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-Age.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-Age.py)| +|魔性熊猫(彩蛋模型)|[link](https://modelscope.cn/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[link](https://modelscope.ai/models/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[link](https://huggingface.co/DiffSynth-Studio/Template-KleinBase4B-PandaMeme)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_inference_low_vram/Template-KleinBase4B-PandaMeme.py)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/full/Template-KleinBase4B-PandaMeme.sh)|[code](https://github.com/modelscope/DiffSynth-Studio/blob/main/examples/flux2/model_training/validate_full/Template-KleinBase4B-PandaMeme.py)| + +* 数据集:[ModelScope](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2)、[ModelScope 国际站](https://modelscope.cn/collections/DiffSynth-Studio/ImagePulseV2)、[HuggingFace](https://huggingface.co/collections/DiffSynth-Studio/imagepulsev2) + +|数据集名称|ModelScope|ModelScope 国际站|HuggingFace| +|-|-|-|-| +|文生图|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-TextImage)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-TextImage)| +|局部重绘|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Inpaint)| +|背景替换|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Background)| +|服装替换|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Clothes)| +|姿态调整|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Pose)| +|前景修改|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Change)| +|局部增删|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-AddRemove)| +|超分辨率|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Upscale)| +|人物特写|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-Human)| +|随机缩放|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Crop)| +|光照调整|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Light)| +|画面结构|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Structure)| +|表情编辑|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-HumanFace)| +|视角调整|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Angle)| +|风格迁移|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Style)| +|多分辨率|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-TextImage-MultiResolution)| +|多图合并|[link](https://modelscope.cn/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge)|[link](https://modelscope.ai/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge)|[link](https://huggingface.co/datasets/DiffSynth-Studio/ImagePulseV2-Edit-Merge)| ## 模型效果一览 @@ -67,4 +73,3 @@ Diffusion Templates 是 DiffSynth-Studio 中的 Diffusion 模型可控生成插 |参考图|重绘区域|输出图| |-|-|-| |![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_reference.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_mask_1.jpg)|![](https://modelscope.cn/datasets/DiffSynth-Studio/examples_in_diffsynth/resolve/master/templates/image_Brightness_Edit_Inpaint.png)| -