|
class Upsample2DBlockTests(unittest.TestCase): |
|
def test_upsample_default(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 32, 32) |
|
upsample = Upsample2D(channels=32, use_conv=False) |
|
with torch.no_grad(): |
|
upsampled = upsample(sample) |
|
|
|
assert upsampled.shape == (1, 32, 64, 64) |
|
output_slice = upsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor([-0.2173, -1.2079, -1.2079, 0.2952, 1.1254, 1.1254, 0.2952, 1.1254, 1.1254]) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
@require_torch_version_greater_equal("2.1") |
|
def test_upsample_bfloat16(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 32, 32).to(torch.bfloat16) |
|
upsample = Upsample2D(channels=32, use_conv=False) |
|
with torch.no_grad(): |
|
upsampled = upsample(sample) |
|
|
|
assert upsampled.shape == (1, 32, 64, 64) |
|
output_slice = upsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor( |
|
[-0.2173, -1.2079, -1.2079, 0.2952, 1.1254, 1.1254, 0.2952, 1.1254, 1.1254], dtype=torch.bfloat16 |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_upsample_with_conv(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 32, 32) |
|
upsample = Upsample2D(channels=32, use_conv=True) |
|
with torch.no_grad(): |
|
upsampled = upsample(sample) |
|
|
|
assert upsampled.shape == (1, 32, 64, 64) |
|
output_slice = upsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor([0.7145, 1.3773, 0.3492, 0.8448, 1.0839, -0.3341, 0.5956, 0.1250, -0.4841]) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_upsample_with_conv_out_dim(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 32, 32) |
|
upsample = Upsample2D(channels=32, use_conv=True, out_channels=64) |
|
with torch.no_grad(): |
|
upsampled = upsample(sample) |
|
|
|
assert upsampled.shape == (1, 64, 64, 64) |
|
output_slice = upsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor([0.2703, 0.1656, -0.2538, -0.0553, -0.2984, 0.1044, 0.1155, 0.2579, 0.7755]) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_upsample_with_transpose(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 32, 32) |
|
upsample = Upsample2D(channels=32, use_conv=False, use_conv_transpose=True) |
|
with torch.no_grad(): |
|
upsampled = upsample(sample) |
|
|
|
assert upsampled.shape == (1, 32, 64, 64) |
|
output_slice = upsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor([-0.3028, -0.1582, 0.0071, 0.0350, -0.4799, -0.1139, 0.1056, -0.1153, -0.1046]) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
|
|
class Downsample2DBlockTests(unittest.TestCase): |
|
def test_downsample_default(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64) |
|
downsample = Downsample2D(channels=32, use_conv=False) |
|
with torch.no_grad(): |
|
downsampled = downsample(sample) |
|
|
|
assert downsampled.shape == (1, 32, 32, 32) |
|
output_slice = downsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor([-0.0513, -0.3889, 0.0640, 0.0836, -0.5460, -0.0341, -0.0169, -0.6967, 0.1179]) |
|
max_diff = (output_slice.flatten() - expected_slice).abs().sum().item() |
|
assert max_diff <= 1e-3 |
|
# assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-1) |
|
|
|
def test_downsample_with_conv(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64) |
|
downsample = Downsample2D(channels=32, use_conv=True) |
|
with torch.no_grad(): |
|
downsampled = downsample(sample) |
|
|
|
assert downsampled.shape == (1, 32, 32, 32) |
|
output_slice = downsampled[0, -1, -3:, -3:] |
|
|
|
expected_slice = torch.tensor( |
|
[0.9267, 0.5878, 0.3337, 1.2321, -0.1191, -0.3984, -0.7532, -0.0715, -0.3913], |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_downsample_with_conv_pad1(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64) |
|
downsample = Downsample2D(channels=32, use_conv=True, padding=1) |
|
with torch.no_grad(): |
|
downsampled = downsample(sample) |
|
|
|
assert downsampled.shape == (1, 32, 32, 32) |
|
output_slice = downsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor([0.9267, 0.5878, 0.3337, 1.2321, -0.1191, -0.3984, -0.7532, -0.0715, -0.3913]) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_downsample_with_conv_out_dim(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64) |
|
downsample = Downsample2D(channels=32, use_conv=True, out_channels=16) |
|
with torch.no_grad(): |
|
downsampled = downsample(sample) |
|
|
|
assert downsampled.shape == (1, 16, 32, 32) |
|
output_slice = downsampled[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor([-0.6586, 0.5985, 0.0721, 0.1256, -0.1492, 0.4436, -0.2544, 0.5021, 1.1522]) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
|
|
class ResnetBlock2DTests(unittest.TestCase): |
|
def test_resnet_default(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64).to(torch_device) |
|
temb = torch.randn(1, 128).to(torch_device) |
|
resnet_block = ResnetBlock2D(in_channels=32, temb_channels=128).to(torch_device) |
|
with torch.no_grad(): |
|
output_tensor = resnet_block(sample, temb) |
|
|
|
assert output_tensor.shape == (1, 32, 64, 64) |
|
output_slice = output_tensor[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor( |
|
[-1.9010, -0.2974, -0.8245, -1.3533, 0.8742, -0.9645, -2.0584, 1.3387, -0.4746], device=torch_device |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_restnet_with_use_in_shortcut(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64).to(torch_device) |
|
temb = torch.randn(1, 128).to(torch_device) |
|
resnet_block = ResnetBlock2D(in_channels=32, temb_channels=128, use_in_shortcut=True).to(torch_device) |
|
with torch.no_grad(): |
|
output_tensor = resnet_block(sample, temb) |
|
|
|
assert output_tensor.shape == (1, 32, 64, 64) |
|
output_slice = output_tensor[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor( |
|
[0.2226, -1.0791, -0.1629, 0.3659, -0.2889, -1.2376, 0.0582, 0.9206, 0.0044], device=torch_device |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_resnet_up(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64).to(torch_device) |
|
temb = torch.randn(1, 128).to(torch_device) |
|
resnet_block = ResnetBlock2D(in_channels=32, temb_channels=128, up=True).to(torch_device) |
|
with torch.no_grad(): |
|
output_tensor = resnet_block(sample, temb) |
|
|
|
assert output_tensor.shape == (1, 32, 128, 128) |
|
output_slice = output_tensor[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor( |
|
[1.2130, -0.8753, -0.9027, 1.5783, -0.5362, -0.5001, 1.0726, -0.7732, -0.4182], device=torch_device |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_resnet_down(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64).to(torch_device) |
|
temb = torch.randn(1, 128).to(torch_device) |
|
resnet_block = ResnetBlock2D(in_channels=32, temb_channels=128, down=True).to(torch_device) |
|
with torch.no_grad(): |
|
output_tensor = resnet_block(sample, temb) |
|
|
|
assert output_tensor.shape == (1, 32, 32, 32) |
|
output_slice = output_tensor[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor( |
|
[-0.3002, -0.7135, 0.1359, 0.0561, -0.7935, 0.0113, -0.1766, -0.6714, -0.0436], device=torch_device |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_restnet_with_kernel_fir(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64).to(torch_device) |
|
temb = torch.randn(1, 128).to(torch_device) |
|
resnet_block = ResnetBlock2D(in_channels=32, temb_channels=128, kernel="fir", down=True).to(torch_device) |
|
with torch.no_grad(): |
|
output_tensor = resnet_block(sample, temb) |
|
|
|
assert output_tensor.shape == (1, 32, 32, 32) |
|
output_slice = output_tensor[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor( |
|
[-0.0934, -0.5729, 0.0909, -0.2710, -0.5044, 0.0243, -0.0665, -0.5267, -0.3136], device=torch_device |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
def test_restnet_with_kernel_sde_vp(self): |
|
torch.manual_seed(0) |
|
sample = torch.randn(1, 32, 64, 64).to(torch_device) |
|
temb = torch.randn(1, 128).to(torch_device) |
|
resnet_block = ResnetBlock2D(in_channels=32, temb_channels=128, kernel="sde_vp", down=True).to(torch_device) |
|
with torch.no_grad(): |
|
output_tensor = resnet_block(sample, temb) |
|
|
|
assert output_tensor.shape == (1, 32, 32, 32) |
|
output_slice = output_tensor[0, -1, -3:, -3:] |
|
expected_slice = torch.tensor( |
|
[-0.3002, -0.7135, 0.1359, 0.0561, -0.7935, 0.0113, -0.1766, -0.6714, -0.0436], device=torch_device |
|
) |
|
assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) |
|
|
|
|
|
class Transformer2DModelTests(unittest.TestCase): |
|
def test_spatial_transformer_default(self): |
|
torch.manual_seed(0) |
|
backend_manual_seed(torch_device, 0) |
model_infrastructuremodel/pipeline reviewCommit tested:
0f1abc4ae8b0eb2a3b40e82a310507281144c423Review performed against the repository review rules.
All requested
model_infrastructurefiles were reviewed:Duplicate-search status: searched existing GitHub Issues and PRs for
model_infrastructure, affected file/function names, and the specific failures below. No direct duplicates were found. Related but not duplicate:huggingface/diffusers#12409and#12533around distributed/context-parallel availability.Test coverage status: fast/unit coverage exists for parts of AutoModel, layer helpers, cache utilities, attention backends, and parallelism helpers, but the slow/integration coverage gaps listed in Issue 9 remain.
Issue 1:
AutoModel.from_pretrainedrejectsPathLikepipeline rootsAffected code:
diffusers/src/diffusers/models/auto_model.py
Lines 343 to 345 in 0f1abc4
Problem:
from_pretrained()acceptsUnion[str, os.PathLike], but the model-index path builds_diffusers_load_idwith"|".join(parts)whilepartsmay contain aPathobject. Passing aPathroot withsubfolderfails before loading.Impact:
Documented/local loading behavior breaks for users who pass
pathlib.Path, and the failure is a low-levelTypeErrorrather than a loading error.Reproduction:
Relevant precedent:
Other loading paths normalize path-like inputs before string operations.
Suggested fix:
Issue 2:
AutoModel.from_pretrainedleaksconfig_nameafter model-index loadingAffected code:
diffusers/src/diffusers/models/auto_model.py
Lines 278 to 293 in 0f1abc4
Problem:
from_pretrained()mutates the class attributecls.config_nameto"model_index.json"and does not restore it after a successful model-index load. A laterAutoModel.from_config(model_dir)then looks formodel_index.jsoninside a plain model directory instead ofconfig.json.Impact:
One successful
AutoModel.from_pretrained()call can change later behavior process-wide.Reproduction:
Relevant precedent:
Config name overrides should be local to the load attempt, not stored on the public class.
Suggested fix:
Issue 3:
CacheMixin.cache_contextleaves stale hook state after exceptionsAffected code:
diffusers/src/diffusers/models/cache_utils.py
Lines 154 to 164 in 0f1abc4
Problem:
cache_context()clears the hook context only after theyield. If the wrapped forward pass raises,_current_contextremains set on stateful hooks.Impact:
A failed denoising step can leak cached/stateful hook state into later calls, and
get_state()can incorrectly succeed outside a context.Reproduction:
Relevant precedent:
Context managers that mutate global or hook state should restore that state in
finally.Suggested fix:
Issue 4: FIR up/downsampling fails on
bfloat16andfloat16inputsAffected code:
diffusers/src/diffusers/models/downsampling.py
Lines 217 to 239 in 0f1abc4
diffusers/src/diffusers/models/downsampling.py
Lines 386 to 397 in 0f1abc4
diffusers/src/diffusers/models/upsampling.py
Lines 261 to 312 in 0f1abc4
diffusers/src/diffusers/models/upsampling.py
Lines 502 to 513 in 0f1abc4
Problem:
The FIR helpers create kernels as default
float32tensors and pass them to convolution against low-precision hidden states.Impact:
Low-precision model execution can fail in FIR downsample/upsample paths with dtype mismatch.
Reproduction:
Relevant precedent:
Upsample2Dalready has low-precision test coverage; FIR helper/module paths need equivalent dtype handling.Suggested fix:
Issue 5:
enable_parallelismuses the wrong distributed guardAffected code:
diffusers/src/diffusers/models/modeling_utils.py
Lines 1595 to 1598 in 0f1abc4
Problem:
The guard uses
andinstead ofor:not is_available() and not is_initialized(). When distributed is unavailable, this may callis_initialized()anyway; when distributed is available but uninitialized, the guard can fail to raise.Impact:
Users get backend-specific errors instead of the intended clear
RuntimeError, or parallelism proceeds beforetorch.distributedis initialized.Reproduction:
Relevant precedent:
Distributed APIs normally require both availability and initialization before model wrapping.
Suggested fix:
Issue 6: Legacy
Attention.set_use_xla_flash_attentionchecks the function object instead of calling itAffected code:
diffusers/src/diffusers/models/attention_processor.py
Lines 311 to 342 in 0f1abc4
Problem:
The method checks
if is_torch_xla_available:instead ofif is_torch_xla_available():, so environments withouttorch_xlaenter the XLA version checks and can raiseInvalidVersion. The method also raises strings in two branches.Impact:
Users enabling XLA flash attention get confusing exceptions instead of a clear dependency/version error.
Reproduction:
Relevant precedent:
AttentionModuleMixin.set_use_xla_flash_attention()insrc/diffusers/models/attention.pycallsis_torch_xla_available()and raisesImportError.Suggested fix:
Issue 7:
FlaxModelMixin.from_pretrained(config=...)can referenceunused_kwargsbefore assignmentAffected code:
diffusers/src/diffusers/models/modeling_flax_utils.py
Lines 307 to 322 in 0f1abc4
Problem:
unused_kwargsis assigned only in theconfig is Nonebranch, but later used unconditionally. Passing a preloaded config can therefore raiseUnboundLocalError. This was statically verified; the local.venvdoes not include Flax.Impact:
Callers that pass an already loaded Flax config cannot reliably use
from_pretrained(config=...).Reproduction:
Relevant precedent:
The PyTorch loading path preserves extra kwargs regardless of whether config is loaded internally or supplied by the caller.
Suggested fix:
Issue 8: Sinusoidal embedding helper defaults to
float64on non-MPS devicesAffected code:
diffusers/src/diffusers/models/embeddings.py
Lines 321 to 356 in 0f1abc4
Problem:
For
output_type="pt", the helper defaultsdtypetotorch.float64unless the device is MPS. The review rules call out NPU float64 limitations, but NPU takes the same non-MPS branch.Impact:
Embedding creation can fail on NPU, and CPU/GPU callers get an unexpectedly high-precision tensor unless they override dtype.
Reproduction:
Relevant precedent:
NPU-safe code paths in the repository avoid implicit float64 tensors.
Suggested fix:
Issue 9: Slow/integration coverage is missing for shared model infrastructure regressions
Affected code:
diffusers/tests/models/test_models_auto.py
Lines 15 to 82 in 0f1abc4
diffusers/tests/models/test_layers_utils.py
Lines 112 to 327 in 0f1abc4
diffusers/tests/models/testing_utils/cache.py
Lines 194 to 221 in 0f1abc4
Problem:
Fast coverage exists, but it does not cover the integration-style failures found above: AutoModel
PathLike/state leakage across calls, cache cleanup after exceptions, FIR low-precision paths, and unavailable/uninitialized distributed backends. No dedicated slow test covers these shared infrastructure behaviors through a tiny pipeline/model load.Impact:
Regressions in shared model infrastructure can affect many model and pipeline families without being caught by family-specific fast tests.
Reproduction:
Relevant precedent:
Other model/pipeline families combine focused fast tests with slow tests that exercise actual loading/runtime behavior.
Suggested fix:
Add fast regression tests for Issues 1-8. Add at least one slow/integration test using a tiny Hub fixture or saved tiny local pipeline to exercise
AutoModel.from_pretrained, cache/offload/attention setup, and shared dtype/device behavior through public APIs.