From 31487609436eeb9be101edf5f2c9840d450142d7 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 30 Jun 2026 08:29:51 +0000 Subject: [PATCH 1/6] Support root-hosted weights + add _allow_files download hook is_safetensors_compatible: filter to weight files up front so config-only folders (e.g. scheduler/) no longer form spurious components. A pipeline whose weights live at the repo root (rather than in component subfolders) is now correctly detected as safetensors-compatible, instead of falling back to .bin and dropping the root .safetensors. _allow_files: optional list of exact filenames in model_index.json that are added to the download set, for repos that keep a component's config/tokenizer files at the root where the folder-based allow patterns would miss them. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/diffusers/pipelines/pipeline_loading_utils.py | 2 ++ src/diffusers/pipelines/pipeline_utils.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/src/diffusers/pipelines/pipeline_loading_utils.py b/src/diffusers/pipelines/pipeline_loading_utils.py index d695f5e7284d..f1e4ea06dc72 100644 --- a/src/diffusers/pipelines/pipeline_loading_utils.py +++ b/src/diffusers/pipelines/pipeline_loading_utils.py @@ -136,6 +136,8 @@ def is_safetensors_compatible(filenames, passed_components=None, folder_names=No ) passed_components = passed_components or [] + # only weight files matter for safetensors compatibility + filenames = filter_model_files(filenames) if folder_names: filenames = {f for f in filenames if os.path.split(f)[0] in folder_names} diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 1fa4db90d995..b47dae7fb382 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -1658,6 +1658,7 @@ def download(cls, pretrained_model_name, **kwargs) -> str | os.PathLike: ) config_dict = cls._dict_from_json_file(config_file) ignore_filenames = config_dict.pop("_ignore_files", []) + allow_filenames = config_dict.pop("_allow_files", []) filenames = {sibling.rfilename for sibling in info.siblings} if variant is not None and _check_legacy_sharding_variant_format(filenames=filenames, variant=variant): @@ -1751,6 +1752,11 @@ def download(cls, pretrained_model_name, **kwargs) -> str | os.PathLike: p for p in allow_patterns if not (len(p.split("/")) == 2 and p.split("/")[0] in passed_components) ] + # Files explicitly allow-listed by the repo author via `_allow_files` in `model_index.json` are + # added to the download set. This supports repos that keep a component's config/tokenizer files at + # the root (instead of in its own subfolder), where the folder-based allow patterns would miss them. + allow_patterns += allow_filenames + if pipeline_class._load_connected_pipes: allow_patterns.append("README.md") From 38b7ab0084a922920e5bed21d444a8089c0d7ecf Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 30 Jun 2026 18:44:26 +0000 Subject: [PATCH 2/6] Add is_safetensors_compatible tests for root-hosted weights Cover root-level safetensors weights and the flat layout where weights live at the root alongside a weight-less subfolder (e.g. scheduler/), which must not prevent the root safetensors from being recognized. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/pipelines/test_pipeline_utils.py | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/pipelines/test_pipeline_utils.py b/tests/pipelines/test_pipeline_utils.py index 6d9e68197976..3960e3b173d9 100644 --- a/tests/pipelines/test_pipeline_utils.py +++ b/tests/pipelines/test_pipeline_utils.py @@ -218,6 +218,34 @@ def test_diffusers_is_compatible_no_components_only_variants(self): ] self.assertFalse(is_safetensors_compatible(filenames)) + def test_diffusers_is_compatible_no_components_safetensors(self): + filenames = [ + "diffusion_pytorch_model.safetensors", + ] + self.assertTrue(is_safetensors_compatible(filenames)) + + def test_diffusers_is_compatible_no_components_safetensors_only_variants(self): + filenames = [ + "diffusion_pytorch_model.fp16.safetensors", + ] + self.assertTrue(is_safetensors_compatible(filenames, variant="fp16")) + + def test_diffusers_is_compatible_weightless_subfolder(self): + # transformers-style flat layout: safetensors weights at the root + a weight-less subfolder (scheduler/) + filenames = [ + "diffusion_pytorch_model.safetensors", + "scheduler/scheduler_config.json", + ] + self.assertTrue(is_safetensors_compatible(filenames)) + + def test_diffusers_is_not_compatible_weightless_subfolder(self): + # same flat layout but only .bin weights at the root -> not safetensors compatible + filenames = [ + "diffusion_pytorch_model.bin", + "scheduler/scheduler_config.json", + ] + self.assertFalse(is_safetensors_compatible(filenames)) + def test_is_compatible_mixed_variants(self): filenames = [ "unet/diffusion_pytorch_model.fp16.safetensors", From a7be96662c6e88ea1360213cfc956654f62f1d22 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Tue, 30 Jun 2026 19:35:08 +0000 Subject: [PATCH 3/6] Add download test for _allow_files Mirrors test_smart_download against a unet-pipeline-dummy clone that lists big_array.npy in _allow_files, asserting it is downloaded (whereas test_smart_download asserts it is skipped without _allow_files). Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/pipelines/test_pipelines.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 1df2cfa569e7..1deac9c8d7d4 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -2083,6 +2083,19 @@ def test_smart_download(self): # is not downloaded, but all the expected ones assert not os.path.isfile(os.path.join(snapshot_dir, "big_array.npy")) + def test_download_allow_files(self): + # `_allow_files` in model_index.json forces files that the smart-download patterns would otherwise skip. + # This repo mirrors `unet-pipeline-dummy` but lists a (tiny stand-in) `big_array.npy` in `_allow_files`, + # so it is downloaded here, whereas `test_smart_download` asserts it is skipped without `_allow_files`. + model_id = "hf-internal-testing/unet-pipeline-dummy-allow-files" + with tempfile.TemporaryDirectory() as tmpdirname: + _ = DiffusionPipeline.from_pretrained(model_id, cache_dir=tmpdirname, force_download=True) + local_repo_name = "--".join(["models"] + model_id.split("/")) + snapshot_dir = os.path.join(tmpdirname, local_repo_name, "snapshots") + snapshot_dir = os.path.join(snapshot_dir, os.listdir(snapshot_dir)[0]) + + assert os.path.isfile(os.path.join(snapshot_dir, "big_array.npy")) + def test_warning_unused_kwargs(self): model_id = "hf-internal-testing/unet-pipeline-dummy" logger = logging.get_logger("diffusers.pipelines") From 1fd3f0fba52ed80329df529dcfabacabada21ca7 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 2 Jul 2026 16:53:02 +0000 Subject: [PATCH 4/6] Replace _allow_files with fixed transformers aux-file allow patterns Per review feedback: an author-controlled _allow_files field in model_index.json would let a repo owner download arbitrary files onto a user's machine. Instead, always allow a fixed diffusers-owned list of known transformers auxiliary filenames (tokenizer/processor/chat-template/generation-config files) so flat, transformers-style repos work without an arbitrary-download hook. Co-Authored-By: Claude Fable 5 --- .../pipelines/pipeline_loading_utils.py | 21 ++++++++++++++ src/diffusers/pipelines/pipeline_utils.py | 10 +++---- tests/pipelines/test_pipelines.py | 29 ++++++++++++------- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/diffusers/pipelines/pipeline_loading_utils.py b/src/diffusers/pipelines/pipeline_loading_utils.py index f1e4ea06dc72..3e85c29d0cea 100644 --- a/src/diffusers/pipelines/pipeline_loading_utils.py +++ b/src/diffusers/pipelines/pipeline_loading_utils.py @@ -69,6 +69,27 @@ TRANSFORMERS_DUMMY_MODULES_FOLDER = "transformers.utils" CONNECTED_PIPES_KEYS = ["prior"] +# Auxiliary (non-weight) files a transformers component saves next to its weights. Repos with a flat, +# transformers-style layout host a component's files at the repo root instead of in a subfolder, where the +# folder-based allow patterns of `DiffusionPipeline.download` would miss them. Root-hosted weights and +# `config.json` are matched by their own patterns, so only these auxiliary filenames need listing. +TRANSFORMERS_COMPONENT_AUX_FILES = [ + "added_tokens.json", + "chat_template.jinja", + "chat_template.json", + "generation_config.json", + "merges.txt", + "preprocessor_config.json", + "processor_config.json", + "special_tokens_map.json", + "spiece.model", + "tokenizer.json", + "tokenizer.model", + "tokenizer_config.json", + "vocab.json", + "vocab.txt", +] + logger = logging.get_logger(__name__) LOADABLE_CLASSES = { diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index b47dae7fb382..a3ef2260751f 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -82,6 +82,7 @@ CONNECTED_PIPES_KEYS, CUSTOM_PIPELINE_FILE_NAME, LOADABLE_CLASSES, + TRANSFORMERS_COMPONENT_AUX_FILES, _download_dduf_file, _fetch_class_library_tuple, _get_custom_components_and_folders, @@ -1658,7 +1659,6 @@ def download(cls, pretrained_model_name, **kwargs) -> str | os.PathLike: ) config_dict = cls._dict_from_json_file(config_file) ignore_filenames = config_dict.pop("_ignore_files", []) - allow_filenames = config_dict.pop("_allow_files", []) filenames = {sibling.rfilename for sibling in info.siblings} if variant is not None and _check_legacy_sharding_variant_format(filenames=filenames, variant=variant): @@ -1752,10 +1752,10 @@ def download(cls, pretrained_model_name, **kwargs) -> str | os.PathLike: p for p in allow_patterns if not (len(p.split("/")) == 2 and p.split("/")[0] in passed_components) ] - # Files explicitly allow-listed by the repo author via `_allow_files` in `model_index.json` are - # added to the download set. This supports repos that keep a component's config/tokenizer files at - # the root (instead of in its own subfolder), where the folder-based allow patterns would miss them. - allow_patterns += allow_filenames + # Repos with a flat, transformers-style layout host a component's files at the repo root instead of + # in a subfolder, where the folder-based allow patterns above miss its auxiliary files (root-hosted + # weights are already included via `model_filenames`, root `config.json` via `CONFIG_NAME`). + allow_patterns += TRANSFORMERS_COMPONENT_AUX_FILES if pipeline_class._load_connected_pipes: allow_patterns.append("README.md") diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 1deac9c8d7d4..5092a5f86b3b 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -2083,18 +2083,27 @@ def test_smart_download(self): # is not downloaded, but all the expected ones assert not os.path.isfile(os.path.join(snapshot_dir, "big_array.npy")) - def test_download_allow_files(self): - # `_allow_files` in model_index.json forces files that the smart-download patterns would otherwise skip. - # This repo mirrors `unet-pipeline-dummy` but lists a (tiny stand-in) `big_array.npy` in `_allow_files`, - # so it is downloaded here, whereas `test_smart_download` asserts it is skipped without `_allow_files`. - model_id = "hf-internal-testing/unet-pipeline-dummy-allow-files" + def test_download_flat_transformers_style_repo(self): + # Repos with a flat, transformers-style layout host a component's files at the repo root instead of in a + # subfolder (here `model` and `processor`; only `scheduler/` has a folder). The download patterns must + # pick up the transformers auxiliary files at the root, while unrelated root files are still skipped. + model_id = "hf-internal-testing/tiny-flat-transformers-style-pipe" with tempfile.TemporaryDirectory() as tmpdirname: - _ = DiffusionPipeline.from_pretrained(model_id, cache_dir=tmpdirname, force_download=True) - local_repo_name = "--".join(["models"] + model_id.split("/")) - snapshot_dir = os.path.join(tmpdirname, local_repo_name, "snapshots") - snapshot_dir = os.path.join(snapshot_dir, os.listdir(snapshot_dir)[0]) + snapshot_dir = DiffusionPipeline.download(model_id, cache_dir=tmpdirname, force_download=True) - assert os.path.isfile(os.path.join(snapshot_dir, "big_array.npy")) + assert os.path.isfile(os.path.join(snapshot_dir, "model.safetensors")) + assert os.path.isfile(os.path.join(snapshot_dir, CONFIG_NAME)) + for aux_file in [ + "tokenizer.json", + "tokenizer_config.json", + "processor_config.json", + "chat_template.jinja", + "generation_config.json", + ]: + assert os.path.isfile(os.path.join(snapshot_dir, aux_file)) + assert os.path.isfile(os.path.join(snapshot_dir, "scheduler", SCHEDULER_CONFIG_NAME)) + # unrelated root files are still not downloaded + assert not os.path.isfile(os.path.join(snapshot_dir, "big_array.npy")) def test_warning_unused_kwargs(self): model_id = "hf-internal-testing/unet-pipeline-dummy" From d4ef74b78256f80cf2de68d94e4e789a98e5f454 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 2 Jul 2026 16:59:07 +0000 Subject: [PATCH 5/6] Limit aux file list to what DiffusionGemma needs Co-Authored-By: Claude Fable 5 --- src/diffusers/pipelines/pipeline_loading_utils.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/diffusers/pipelines/pipeline_loading_utils.py b/src/diffusers/pipelines/pipeline_loading_utils.py index 3e85c29d0cea..90cbffc5b69d 100644 --- a/src/diffusers/pipelines/pipeline_loading_utils.py +++ b/src/diffusers/pipelines/pipeline_loading_utils.py @@ -73,21 +73,13 @@ # transformers-style layout host a component's files at the repo root instead of in a subfolder, where the # folder-based allow patterns of `DiffusionPipeline.download` would miss them. Root-hosted weights and # `config.json` are matched by their own patterns, so only these auxiliary filenames need listing. +# Currently the set needed by DiffusionGemma — extend as new flat-layout pipelines require it. TRANSFORMERS_COMPONENT_AUX_FILES = [ - "added_tokens.json", "chat_template.jinja", - "chat_template.json", "generation_config.json", - "merges.txt", - "preprocessor_config.json", "processor_config.json", - "special_tokens_map.json", - "spiece.model", "tokenizer.json", - "tokenizer.model", "tokenizer_config.json", - "vocab.json", - "vocab.txt", ] logger = logging.get_logger(__name__) From e4b49a81ec2b1b09cebcba27597028f0844302bd Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Thu, 2 Jul 2026 17:13:24 +0000 Subject: [PATCH 6/6] Use transformers-style weight naming in flat-layout tests, add sharded test Co-Authored-By: Claude Fable 5 --- tests/pipelines/test_pipeline_utils.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/pipelines/test_pipeline_utils.py b/tests/pipelines/test_pipeline_utils.py index 3960e3b173d9..e0c73b96b8f9 100644 --- a/tests/pipelines/test_pipeline_utils.py +++ b/tests/pipelines/test_pipeline_utils.py @@ -230,22 +230,32 @@ def test_diffusers_is_compatible_no_components_safetensors_only_variants(self): ] self.assertTrue(is_safetensors_compatible(filenames, variant="fp16")) - def test_diffusers_is_compatible_weightless_subfolder(self): - # transformers-style flat layout: safetensors weights at the root + a weight-less subfolder (scheduler/) + def test_transformers_is_compatible_weightless_subfolder(self): + # transformers-style flat layout: transformers-named weights at the root + a weight-less subfolder filenames = [ - "diffusion_pytorch_model.safetensors", + "model.safetensors", "scheduler/scheduler_config.json", ] self.assertTrue(is_safetensors_compatible(filenames)) - def test_diffusers_is_not_compatible_weightless_subfolder(self): + def test_transformers_is_not_compatible_weightless_subfolder(self): # same flat layout but only .bin weights at the root -> not safetensors compatible filenames = [ - "diffusion_pytorch_model.bin", + "pytorch_model.bin", "scheduler/scheduler_config.json", ] self.assertFalse(is_safetensors_compatible(filenames)) + def test_transformers_is_compatible_sharded_root_weights(self): + # sharded transformers-style weights at the repo root (e.g. DiffusionGemma's model-00001-of-00011.safetensors) + filenames = [ + "model-00001-of-00002.safetensors", + "model-00002-of-00002.safetensors", + "model.safetensors.index.json", + "scheduler/scheduler_config.json", + ] + self.assertTrue(is_safetensors_compatible(filenames)) + def test_is_compatible_mixed_variants(self): filenames = [ "unet/diffusion_pytorch_model.fp16.safetensors",