Skip to content

Commit 4fb24e5

Browse files
committed
gh-94345: Fix dataclasses.asdict()/astuple() crash on circular references
asdict() and astuple() recursed into nested dataclasses and containers without tracking which objects were already being processed, so a circular reference recursed until a RecursionError (or crashed the interpreter on a release build). Track the objects on the current recursion path and raise ValueError("Circular reference detected") -- matching json.dumps() -- when one is revisited.
1 parent 30aeeb3 commit 4fb24e5

4 files changed

Lines changed: 154 additions & 24 deletions

File tree

Doc/library/dataclasses.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,12 @@ Module contents
409409
{field.name: getattr(obj, field.name) for field in fields(obj)}
410410

411411
:func:`!asdict` raises :exc:`TypeError` if *obj* is not a dataclass
412-
instance.
412+
instance. It raises :exc:`ValueError` if *obj* contains a circular
413+
reference.
414+
415+
.. versionchanged:: next
416+
A circular reference now raises :exc:`ValueError` instead of
417+
:exc:`RecursionError`.
413418

414419
.. function:: astuple(obj, *, tuple_factory=tuple)
415420

@@ -429,7 +434,12 @@ Module contents
429434
tuple(getattr(obj, field.name) for field in dataclasses.fields(obj))
430435

431436
:func:`!astuple` raises :exc:`TypeError` if *obj* is not a dataclass
432-
instance.
437+
instance. It raises :exc:`ValueError` if *obj* contains a circular
438+
reference.
439+
440+
.. versionchanged:: next
441+
A circular reference now raises :exc:`ValueError` instead of
442+
:exc:`RecursionError`.
433443

434444
.. function:: make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, kw_only=False, slots=False, weakref_slot=False, module=None, qualname=None, decorator=dataclass)
435445

Lib/dataclasses.py

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,35 +1510,47 @@ class C:
15101510
"""
15111511
if not _is_dataclass_instance(obj):
15121512
raise TypeError("asdict() should be called on dataclass instances")
1513-
return _asdict_inner(obj, dict_factory)
1513+
return _asdict_inner(obj, dict_factory, set())
15141514

15151515

1516-
def _asdict_inner(obj, dict_factory):
1516+
def _asdict_inner(obj, dict_factory, seen):
15171517
obj_type = type(obj)
15181518
if obj_type in _ATOMIC_TYPES:
15191519
return obj
1520-
elif hasattr(obj_type, _FIELDS):
1520+
# Guard against circular references, which would otherwise recurse until
1521+
# a RecursionError (or a crash on release builds). gh-94345
1522+
if id(obj) in seen:
1523+
raise ValueError("Circular reference detected")
1524+
seen.add(id(obj))
1525+
try:
1526+
return _asdict_inner_recurse(obj, obj_type, dict_factory, seen)
1527+
finally:
1528+
seen.discard(id(obj))
1529+
1530+
1531+
def _asdict_inner_recurse(obj, obj_type, dict_factory, seen):
1532+
if hasattr(obj_type, _FIELDS):
15211533
# dataclass instance: fast path for the common case
15221534
if dict_factory is dict:
15231535
return {
1524-
f.name: _asdict_inner(getattr(obj, f.name), dict)
1536+
f.name: _asdict_inner(getattr(obj, f.name), dict, seen)
15251537
for f in fields(obj)
15261538
}
15271539
else:
15281540
return dict_factory([
1529-
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory))
1541+
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory, seen))
15301542
for f in fields(obj)
15311543
])
15321544
# handle the builtin types first for speed; subclasses handled below
15331545
elif obj_type is list:
1534-
return [_asdict_inner(v, dict_factory) for v in obj]
1546+
return [_asdict_inner(v, dict_factory, seen) for v in obj]
15351547
elif obj_type is dict:
15361548
return {
1537-
_asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory)
1549+
_asdict_inner(k, dict_factory, seen): _asdict_inner(v, dict_factory, seen)
15381550
for k, v in obj.items()
15391551
}
15401552
elif obj_type is tuple:
1541-
return tuple([_asdict_inner(v, dict_factory) for v in obj])
1553+
return tuple([_asdict_inner(v, dict_factory, seen) for v in obj])
15421554
elif issubclass(obj_type, tuple):
15431555
if hasattr(obj, '_fields'):
15441556
# obj is a namedtuple. Recurse into it, but the returned
@@ -1559,24 +1571,24 @@ def _asdict_inner(obj, dict_factory):
15591571
# dict. Note that if we returned dicts here instead of
15601572
# namedtuples, we could no longer call asdict() on a data
15611573
# structure where a namedtuple was used as a dict key.
1562-
return obj_type(*[_asdict_inner(v, dict_factory) for v in obj])
1574+
return obj_type(*[_asdict_inner(v, dict_factory, seen) for v in obj])
15631575
else:
1564-
return obj_type(_asdict_inner(v, dict_factory) for v in obj)
1576+
return obj_type(_asdict_inner(v, dict_factory, seen) for v in obj)
15651577
elif issubclass(obj_type, (dict, frozendict)):
15661578
if hasattr(obj_type, 'default_factory'):
15671579
# obj is a defaultdict, which has a different constructor from
15681580
# dict as it requires the default_factory as its first arg.
15691581
result = obj_type(obj.default_factory)
15701582
for k, v in obj.items():
1571-
result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory)
1583+
result[_asdict_inner(k, dict_factory, seen)] = _asdict_inner(v, dict_factory, seen)
15721584
return result
1573-
return obj_type((_asdict_inner(k, dict_factory),
1574-
_asdict_inner(v, dict_factory))
1585+
return obj_type((_asdict_inner(k, dict_factory, seen),
1586+
_asdict_inner(v, dict_factory, seen))
15751587
for k, v in obj.items())
15761588
elif issubclass(obj_type, list):
15771589
# Assume we can create an object of this type by passing in a
15781590
# generator
1579-
return obj_type(_asdict_inner(v, dict_factory) for v in obj)
1591+
return obj_type(_asdict_inner(v, dict_factory, seen) for v in obj)
15801592
else:
15811593
return copy.deepcopy(obj)
15821594

@@ -1603,15 +1615,27 @@ class C:
16031615

16041616
if not _is_dataclass_instance(obj):
16051617
raise TypeError("astuple() should be called on dataclass instances")
1606-
return _astuple_inner(obj, tuple_factory)
1618+
return _astuple_inner(obj, tuple_factory, set())
16071619

16081620

1609-
def _astuple_inner(obj, tuple_factory):
1621+
def _astuple_inner(obj, tuple_factory, seen):
16101622
if type(obj) in _ATOMIC_TYPES:
16111623
return obj
1612-
elif _is_dataclass_instance(obj):
1624+
# Guard against circular references, which would otherwise recurse until
1625+
# a RecursionError (or a crash on release builds). gh-94345
1626+
if id(obj) in seen:
1627+
raise ValueError("Circular reference detected")
1628+
seen.add(id(obj))
1629+
try:
1630+
return _astuple_inner_recurse(obj, tuple_factory, seen)
1631+
finally:
1632+
seen.discard(id(obj))
1633+
1634+
1635+
def _astuple_inner_recurse(obj, tuple_factory, seen):
1636+
if _is_dataclass_instance(obj):
16131637
return tuple_factory([
1614-
_astuple_inner(getattr(obj, f.name), tuple_factory)
1638+
_astuple_inner(getattr(obj, f.name), tuple_factory, seen)
16151639
for f in fields(obj)
16161640
])
16171641
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
@@ -1621,22 +1645,22 @@ def _astuple_inner(obj, tuple_factory):
16211645
# treated (see below), but we just need to create them
16221646
# differently because a namedtuple's __init__ needs to be
16231647
# called differently (see bpo-34363).
1624-
return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj])
1648+
return type(obj)(*[_astuple_inner(v, tuple_factory, seen) for v in obj])
16251649
elif isinstance(obj, (list, tuple)):
16261650
# Assume we can create an object of this type by passing in a
16271651
# generator (which is not true for namedtuples, handled
16281652
# above).
1629-
return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
1653+
return type(obj)(_astuple_inner(v, tuple_factory, seen) for v in obj)
16301654
elif isinstance(obj, (dict, frozendict)):
16311655
obj_type = type(obj)
16321656
if hasattr(obj_type, 'default_factory'):
16331657
# obj is a defaultdict, which has a different constructor from
16341658
# dict as it requires the default_factory as its first arg.
16351659
result = obj_type(getattr(obj, 'default_factory'))
16361660
for k, v in obj.items():
1637-
result[_astuple_inner(k, tuple_factory)] = _astuple_inner(v, tuple_factory)
1661+
result[_astuple_inner(k, tuple_factory, seen)] = _astuple_inner(v, tuple_factory, seen)
16381662
return result
1639-
return obj_type((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
1663+
return obj_type((_astuple_inner(k, tuple_factory, seen), _astuple_inner(v, tuple_factory, seen))
16401664
for k, v in obj.items())
16411665
else:
16421666
return copy.deepcopy(obj)

Lib/test/test_dataclasses/__init__.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1976,6 +1976,98 @@ class C:
19761976
self.assertEqual(t, ({"x": [12]},))
19771977
self.assertTrue(t[0] is not dd) # make sure defaultdict is copied
19781978

1979+
def test_helper_asdict_circular_reference(self):
1980+
# gh-94345: asdict() must raise a clear error on a circular reference
1981+
# instead of recursing until a RecursionError (or crashing).
1982+
@dataclass
1983+
class C:
1984+
name: str
1985+
link: object = None
1986+
items: list = field(default_factory=list)
1987+
1988+
# Direct self reference.
1989+
c = C('c')
1990+
c.link = c
1991+
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
1992+
asdict(c)
1993+
# Indirect cycle through another dataclass.
1994+
a = C('a')
1995+
b = C('b')
1996+
a.link = b
1997+
b.link = a
1998+
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
1999+
asdict(a)
2000+
# Cycle through a list field.
2001+
d = C('d')
2002+
d.items.append(d)
2003+
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
2004+
asdict(d)
2005+
# Cycle through a dict field.
2006+
e = C('e')
2007+
e.link = {'self': e}
2008+
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
2009+
asdict(e)
2010+
2011+
def test_helper_asdict_shared_reference_is_not_circular(self):
2012+
# gh-94345: an object referenced more than once without forming a
2013+
# cycle (a DAG) must still be converted successfully.
2014+
@dataclass
2015+
class Inner:
2016+
value: int
2017+
@dataclass
2018+
class Outer:
2019+
left: object
2020+
right: object
2021+
2022+
shared = Inner(1)
2023+
o = Outer(left=shared, right=shared)
2024+
self.assertEqual(asdict(o),
2025+
{'left': {'value': 1}, 'right': {'value': 1}})
2026+
# A shared built-in container referenced twice is fine too.
2027+
shared_list = [1, 2]
2028+
o2 = Outer(left=shared_list, right=shared_list)
2029+
self.assertEqual(asdict(o2), {'left': [1, 2], 'right': [1, 2]})
2030+
2031+
def test_helper_astuple_circular_reference(self):
2032+
# gh-94345: see test_helper_asdict_circular_reference.
2033+
@dataclass
2034+
class C:
2035+
name: str
2036+
link: object = None
2037+
items: list = field(default_factory=list)
2038+
2039+
c = C('c')
2040+
c.link = c
2041+
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
2042+
astuple(c)
2043+
a = C('a')
2044+
b = C('b')
2045+
a.link = b
2046+
b.link = a
2047+
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
2048+
astuple(a)
2049+
d = C('d')
2050+
d.items.append(d)
2051+
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
2052+
astuple(d)
2053+
2054+
def test_helper_astuple_shared_reference_is_not_circular(self):
2055+
# gh-94345: a DAG must still be converted successfully.
2056+
@dataclass
2057+
class Inner:
2058+
value: int
2059+
@dataclass
2060+
class Outer:
2061+
left: object
2062+
right: object
2063+
2064+
shared = Inner(1)
2065+
o = Outer(left=shared, right=shared)
2066+
self.assertEqual(astuple(o), ((1,), (1,)))
2067+
shared_list = [1, 2]
2068+
o2 = Outer(left=shared_list, right=shared_list)
2069+
self.assertEqual(astuple(o2), ([1, 2], [1, 2]))
2070+
19792071
def test_dynamic_class_creation(self):
19802072
cls_dict = {'__annotations__': {'x': int, 'y': int},
19812073
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:func:`dataclasses.asdict` and :func:`dataclasses.astuple` now raise
2+
:exc:`ValueError` when the dataclass instance contains a circular reference,
3+
instead of recursing until a :exc:`RecursionError` (or crashing on a release
4+
build).

0 commit comments

Comments
 (0)