Skip to content

Commit 2e37d83

Browse files
gh-148653: Fix some marshal errors related to recursive immutable objects (GH-148698)
Forbid marshalling recursive code, slice and frozendict objects which cannot be correctly unmarshalled. Reject invalid marshal data produced by marshalling recursive frozendict objects which was previously incorrectly unmarshalled. Add multiple tests for recursive data structures.
1 parent 92164dc commit 2e37d83

File tree

3 files changed

+182
-16
lines changed

3 files changed

+182
-16
lines changed

Lib/test/test_marshal.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,133 @@ def test_recursion_limit(self):
317317
last.append([0])
318318
self.assertRaises(ValueError, marshal.dumps, head)
319319

320+
def test_reference_loop_list(self):
321+
a = []
322+
a.append(a)
323+
for v in range(3):
324+
self.assertRaises(ValueError, marshal.dumps, a, v)
325+
for v in range(3, marshal.version + 1):
326+
d = marshal.dumps(a, v)
327+
b = marshal.loads(d)
328+
self.assertIsInstance(b, list)
329+
self.assertIs(b[0], b)
330+
331+
def test_reference_loop_dict(self):
332+
a = {}
333+
a[None] = a
334+
for v in range(3):
335+
self.assertRaises(ValueError, marshal.dumps, a, v)
336+
for v in range(3, marshal.version + 1):
337+
d = marshal.dumps(a, v)
338+
b = marshal.loads(d)
339+
self.assertIsInstance(b, dict)
340+
self.assertIs(b[None], b)
341+
342+
def test_reference_loop_tuple(self):
343+
a = ([],)
344+
a[0].append(a)
345+
for v in range(3):
346+
self.assertRaises(ValueError, marshal.dumps, a, v)
347+
for v in range(3, marshal.version + 1):
348+
d = marshal.dumps(a, v)
349+
b = marshal.loads(d)
350+
self.assertIsInstance(b, tuple)
351+
self.assertIsInstance(b[0], list)
352+
self.assertIs(b[0][0], b)
353+
354+
def test_reference_loop_code(self):
355+
def f():
356+
return 1234.5
357+
code = f.__code__
358+
a = []
359+
code = code.replace(co_consts=code.co_consts + (a,))
360+
a.append(code)
361+
for v in range(marshal.version + 1):
362+
self.assertRaises(ValueError, marshal.dumps, code, v)
363+
364+
def test_reference_loop_slice(self):
365+
a = slice([], None)
366+
a.start.append(a)
367+
for v in range(marshal.version + 1):
368+
self.assertRaises(ValueError, marshal.dumps, a, v)
369+
370+
a = slice(None, [])
371+
a.stop.append(a)
372+
for v in range(marshal.version + 1):
373+
self.assertRaises(ValueError, marshal.dumps, a, v)
374+
375+
a = slice(None, None, [])
376+
a.step.append(a)
377+
for v in range(marshal.version + 1):
378+
self.assertRaises(ValueError, marshal.dumps, a, v)
379+
380+
def test_reference_loop_frozendict(self):
381+
a = frozendict({None: []})
382+
a[None].append(a)
383+
for v in range(marshal.version + 1):
384+
self.assertRaises(ValueError, marshal.dumps, a, v)
385+
386+
def test_loads_reference_loop_list(self):
387+
data = b'\xdb\x01\x00\x00\x00r\x00\x00\x00\x00' # [<R>]
388+
a = marshal.loads(data)
389+
self.assertIsInstance(a, list)
390+
self.assertIs(a[0], a)
391+
392+
def test_loads_reference_loop_dict(self):
393+
data = b'\xfbNr\x00\x00\x00\x000' # {None: <R>}
394+
a = marshal.loads(data)
395+
self.assertIsInstance(a, dict)
396+
self.assertIs(a[None], a)
397+
398+
def test_loads_abnormal_reference_loops(self):
399+
# Indirect self-references of tuples.
400+
data = b'\xa8\x01\x00\x00\x00[\x01\x00\x00\x00r\x00\x00\x00\x00' # ([<R>],)
401+
a = marshal.loads(data)
402+
self.assertIsInstance(a, tuple)
403+
self.assertIsInstance(a[0], list)
404+
self.assertIs(a[0][0], a)
405+
406+
data = b'\xa8\x01\x00\x00\x00{Nr\x00\x00\x00\x000' # ({None: <R>},)
407+
a = marshal.loads(data)
408+
self.assertIsInstance(a, tuple)
409+
self.assertIsInstance(a[0], dict)
410+
self.assertIs(a[0][None], a)
411+
412+
# Direct self-reference which cannot be created in Python.
413+
data = b'\xa8\x01\x00\x00\x00r\x00\x00\x00\x00' # (<R>,)
414+
a = marshal.loads(data)
415+
self.assertIsInstance(a, tuple)
416+
self.assertIs(a[0], a)
417+
418+
# Direct self-references which cannot be created in Python
419+
# because of unhashability.
420+
data = b'\xfbr\x00\x00\x00\x00N0' # {<R>: None}
421+
self.assertRaises(TypeError, marshal.loads, data)
422+
data = b'\xbc\x01\x00\x00\x00r\x00\x00\x00\x00' # {<R>}
423+
self.assertRaises(TypeError, marshal.loads, data)
424+
425+
for data in [
426+
# Indirect self-references of immutable objects.
427+
b'\xba[\x01\x00\x00\x00r\x00\x00\x00\x00NN', # slice([<R>], None)
428+
b'\xbaN[\x01\x00\x00\x00r\x00\x00\x00\x00N', # slice(None, [<R>])
429+
b'\xbaNN[\x01\x00\x00\x00r\x00\x00\x00\x00', # slice(None, None, [<R>])
430+
b'\xba{Nr\x00\x00\x00\x000NN', # slice({None: <R>}, None)
431+
b'\xbaN{Nr\x00\x00\x00\x000N', # slice(None, {None: <R>})
432+
b'\xbaNN{Nr\x00\x00\x00\x000', # slice(None, None, {None: <R>})
433+
b'\xfdN[\x01\x00\x00\x00r\x00\x00\x00\x000', # frozendict({None: [<R>]})
434+
b'\xfdN{Nr\x00\x00\x00\x0000', # frozendict({None: {None: <R>})
435+
436+
# Direct self-references which cannot be created in Python.
437+
b'\xbe\x01\x00\x00\x00r\x00\x00\x00\x00', # frozenset({<R>})
438+
b'\xfdNr\x00\x00\x00\x000', # frozendict({None: <R>})
439+
b'\xfdr\x00\x00\x00\x00N0', # frozendict({<R>: None})
440+
b'\xbar\x00\x00\x00\x00NN', # slice(<R>, None)
441+
b'\xbaNr\x00\x00\x00\x00N', # slice(None, <R>)
442+
b'\xbaNNr\x00\x00\x00\x00', # slice(None, None, <R>)
443+
]:
444+
with self.subTest(data=data):
445+
self.assertRaises(ValueError, marshal.loads, data)
446+
320447
def test_exact_type_match(self):
321448
# Former bug:
322449
# >>> class Int(int): pass
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Forbid :mod:`marshalling <marshal>` recursive code objects, :class:`slice`
2+
and :class:`frozendict` objects which cannot be correctly unmarshalled.

Python/marshal.c

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,6 @@ static int
382382
w_ref(PyObject *v, char *flag, WFILE *p)
383383
{
384384
_Py_hashtable_entry_t *entry;
385-
int w;
386385

387386
if (p->version < 3 || p->hashtable == NULL)
388387
return 0; /* not writing object references */
@@ -399,20 +398,28 @@ w_ref(PyObject *v, char *flag, WFILE *p)
399398
entry = _Py_hashtable_get_entry(p->hashtable, v);
400399
if (entry != NULL) {
401400
/* write the reference index to the stream */
402-
w = (int)(uintptr_t)entry->value;
401+
uintptr_t w = (uintptr_t)entry->value;
402+
if (w & 0x80000000LU) {
403+
PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
404+
goto err;
405+
}
403406
/* we don't store "long" indices in the dict */
404-
assert(0 <= w && w <= 0x7fffffff);
407+
assert(w <= 0x7fffffff);
405408
w_byte(TYPE_REF, p);
406-
w_long(w, p);
409+
w_long((int)w, p);
407410
return 1;
408411
} else {
409-
size_t s = p->hashtable->nentries;
412+
size_t w = p->hashtable->nentries;
410413
/* we don't support long indices */
411-
if (s >= 0x7fffffff) {
414+
if (w >= 0x7fffffff) {
412415
PyErr_SetString(PyExc_ValueError, "too many objects");
413416
goto err;
414417
}
415-
w = (int)s;
418+
// Corresponding code should call w_complete() after
419+
// writing the object.
420+
if (PyCode_Check(v) || PySlice_Check(v) || PyFrozenDict_CheckExact(v)) {
421+
w |= 0x80000000LU;
422+
}
416423
if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
417424
(void *)(uintptr_t)w) < 0) {
418425
Py_DECREF(v);
@@ -426,6 +433,27 @@ w_ref(PyObject *v, char *flag, WFILE *p)
426433
return 1;
427434
}
428435

436+
static void
437+
w_complete(PyObject *v, WFILE *p)
438+
{
439+
if (p->version < 3 || p->hashtable == NULL) {
440+
return;
441+
}
442+
if (_PyObject_IsUniquelyReferenced(v)) {
443+
return;
444+
}
445+
446+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
447+
if (entry == NULL) {
448+
return;
449+
}
450+
assert(entry != NULL);
451+
uintptr_t w = (uintptr_t)entry->value;
452+
assert(w & 0x80000000LU);
453+
w &= ~0x80000000LU;
454+
entry->value = (void *)(uintptr_t)w;
455+
}
456+
429457
static void
430458
w_complex_object(PyObject *v, char flag, WFILE *p);
431459

@@ -599,6 +627,9 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
599627
w_object(value, p);
600628
}
601629
w_object((PyObject *)NULL, p);
630+
if (PyFrozenDict_CheckExact(v)) {
631+
w_complete(v, p);
632+
}
602633
}
603634
else if (PyAnySet_CheckExact(v)) {
604635
PyObject *value;
@@ -684,6 +715,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
684715
w_object(co->co_linetable, p);
685716
w_object(co->co_exceptiontable, p);
686717
Py_DECREF(co_code);
718+
w_complete(v, p);
687719
}
688720
else if (PyObject_CheckBuffer(v)) {
689721
/* Write unknown bytes-like objects as a bytes object */
@@ -709,6 +741,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
709741
w_object(slice->start, p);
710742
w_object(slice->stop, p);
711743
w_object(slice->step, p);
744+
w_complete(v, p);
712745
}
713746
else {
714747
W_TYPE(TYPE_UNKNOWN, p);
@@ -1433,9 +1466,19 @@ r_object(RFILE *p)
14331466
case TYPE_DICT:
14341467
case TYPE_FROZENDICT:
14351468
v = PyDict_New();
1436-
R_REF(v);
1437-
if (v == NULL)
1469+
if (v == NULL) {
14381470
break;
1471+
}
1472+
if (type == TYPE_DICT) {
1473+
R_REF(v);
1474+
}
1475+
else {
1476+
idx = r_ref_reserve(flag, p);
1477+
if (idx < 0) {
1478+
Py_CLEAR(v);
1479+
break;
1480+
}
1481+
}
14391482
for (;;) {
14401483
PyObject *key, *val;
14411484
key = r_object(p);
@@ -1458,13 +1501,7 @@ r_object(RFILE *p)
14581501
Py_CLEAR(v);
14591502
}
14601503
if (type == TYPE_FROZENDICT && v != NULL) {
1461-
PyObject *frozendict = PyFrozenDict_New(v);
1462-
if (frozendict != NULL) {
1463-
Py_SETREF(v, frozendict);
1464-
}
1465-
else {
1466-
Py_CLEAR(v);
1467-
}
1504+
Py_SETREF(v, PyFrozenDict_New(v));
14681505
}
14691506
retval = v;
14701507
break;

0 commit comments

Comments
 (0)