From 03794ce9a58b1f33751c88d7d876dfbf27645c56 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 26 Apr 2026 19:31:25 +0100 Subject: [PATCH 1/2] Use `XML_SetHashSalt16Bytes` from libExpat when possible --- Include/pyexpat.h | 3 +++ .../2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst | 3 +++ Modules/_elementtree.c | 8 ++++++-- Modules/pyexpat.c | 11 ++++++++++- 4 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst diff --git a/Include/pyexpat.h b/Include/pyexpat.h index f523f8bb273983..a676e16a7a457e 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -62,6 +62,9 @@ struct PyExpat_CAPI XML_Parser parser, unsigned long long activationThresholdBytes); XML_Bool (*SetBillionLaughsAttackProtectionMaximumAmplification)( XML_Parser parser, float maxAmplificationFactor); + /* might be NULL for expat < 2.8.0 */ + XML_Bool (*SetHashSalt16Bytes)( + XML_Parser parser, const uint8_t entropy[16]); /* always add new stuff to the end! */ }; diff --git a/Misc/NEWS.d/next/Security/2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst b/Misc/NEWS.d/next/Security/2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst new file mode 100644 index 00000000000000..d1b5b368684e6a --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst @@ -0,0 +1,3 @@ +Improved protection against XML hash-flooding attacks in +:mod:`xml.parsers.expat` and :mod:`xml.etree.ElementTree` when Python is +compiled with libExpat 2.8.0 or later. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index cbd1e026df2722..b2d4b982602c58 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3735,8 +3735,12 @@ _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target, PyErr_NoMemory(); return -1; } - /* expat < 2.1.0 has no XML_SetHashSalt() */ - if (EXPAT(st, SetHashSalt) != NULL) { + // Prefer 16-byte entropy, only expat >= 2.8.0. See gh-149018 + if (EXPAT(st, SetHashSalt16Bytes) != NULL) { + EXPAT(st, SetHashSalt16Bytes)(self->parser, + (const uint8_t *)_Py_HashSecret.uc); + } + else if (EXPAT(st, SetHashSalt) != NULL) { EXPAT(st, SetHashSalt)(self->parser, (unsigned long)_Py_HashSecret.expat.hashsalt); } diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 0f0afe17513ef1..1df433e64bc096 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1533,7 +1533,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding, Py_DECREF(self); return NULL; } -#if XML_COMBINED_VERSION >= 20100 +#if XML_COMBINED_VERSION >= 20800 + /* This feature was added upstream in libexpat 2.8.0. */ + XML_SetHashSalt16Bytes(self->itself, + (const uint8_t *)_Py_HashSecret.uc); +#elif XML_COMBINED_VERSION >= 20100 /* This feature was added upstream in libexpat 2.1.0. */ XML_SetHashSalt(self->itself, (unsigned long)_Py_HashSecret.expat.hashsalt); @@ -2427,6 +2431,11 @@ pyexpat_exec(PyObject *mod) #else capi->SetHashSalt = NULL; #endif +#if XML_COMBINED_VERSION >= 20800 + capi->SetHashSalt16Bytes = XML_SetHashSalt16Bytes; +#else + capi->SetHashSalt16Bytes = NULL; +#endif #if XML_COMBINED_VERSION >= 20600 capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled; #else From ccb8d2f7df9534e49a43554193d7f5f4d993189c Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 26 Apr 2026 19:42:01 +0100 Subject: [PATCH 2/2] Add `_Py_HashSecret_t.expat.hashsalt16` instead --- Include/internal/pycore_pyhash.h | 8 +++++--- Modules/_elementtree.c | 2 +- Modules/pyexpat.c | 3 +-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h index 84cb72fa6fd1b2..3056dc44cc0f1b 100644 --- a/Include/internal/pycore_pyhash.h +++ b/Include/internal/pycore_pyhash.h @@ -27,14 +27,14 @@ _Py_HashPointerRaw(const void *ptr) * pppppppp ssssssss ........ fnv -- two Py_hash_t * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t - * ........ ........ eeeeeeee pyexpat XML hash salt + * eeeeeeee eeeeeeee eeeeeeee pyexpat XML hash salt * * memory layout on 32 bit systems * cccccccc cccccccc cccccccc uc * ppppssss ........ ........ fnv -- two Py_hash_t * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*) * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t - * ........ ........ eeee.... pyexpat XML hash salt + * eeeeeeee eeeeeeee eeee.... pyexpat XML hash salt * * (*) The siphash member may not be available on 32 bit platforms without * an unsigned int64 data type. @@ -58,7 +58,9 @@ typedef union { Py_hash_t suffix; } djbx33a; struct { - unsigned char padding[16]; + /* 16 bytes for XML_SetHashSalt16Bytes */ + uint8_t hashsalt16[16]; + /* 4/8 bytes for legacy XML_SetHashSalt */ Py_hash_t hashsalt; } expat; } _Py_HashSecret_t; diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index b2d4b982602c58..9e794be5c109ba 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3738,7 +3738,7 @@ _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target, // Prefer 16-byte entropy, only expat >= 2.8.0. See gh-149018 if (EXPAT(st, SetHashSalt16Bytes) != NULL) { EXPAT(st, SetHashSalt16Bytes)(self->parser, - (const uint8_t *)_Py_HashSecret.uc); + _Py_HashSecret.expat.hashsalt16); } else if (EXPAT(st, SetHashSalt) != NULL) { EXPAT(st, SetHashSalt)(self->parser, diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 1df433e64bc096..78efbef679024f 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1535,8 +1535,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding, } #if XML_COMBINED_VERSION >= 20800 /* This feature was added upstream in libexpat 2.8.0. */ - XML_SetHashSalt16Bytes(self->itself, - (const uint8_t *)_Py_HashSecret.uc); + XML_SetHashSalt16Bytes(self->itself, _Py_HashSecret.expat.hashsalt16); #elif XML_COMBINED_VERSION >= 20100 /* This feature was added upstream in libexpat 2.1.0. */ XML_SetHashSalt(self->itself,