diff --git a/CHANGES.rst b/CHANGES.rst index cd5dc19d847..2e30443b567 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,26 @@ .. towncrier release notes start +3.13.5 (2026-03-31) +=================== + +Bug fixes +--------- + +- Skipped the duplicate singleton header check in lax mode (the default for response + parsing). In strict mode (request parsing, or ``-X dev``), all RFC 9110 singletons + are still enforced -- by :user:`bdraco`. + + + *Related issues and pull requests on GitHub:* + :issue:`12302`. + + + + +---- + + 3.13.4 (2026-03-28) =================== diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index 2ceccf41f2c..9a444be66fc 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -71,8 +71,11 @@ cdef object StreamReader = _StreamReader cdef object DeflateBuffer = _DeflateBuffer cdef bytes EMPTY_BYTES = b"" -# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 -cdef tuple SINGLETON_HEADERS = ( +# RFC 9110 singleton headers — duplicates are rejected in strict mode. +# In lax mode (response parser default), the check is skipped entirely +# since real-world servers (e.g. Google APIs, Werkzeug) commonly send +# duplicate headers like Content-Type or Server. +cdef frozenset SINGLETON_HEADERS = frozenset({ hdrs.CONTENT_LENGTH, hdrs.CONTENT_LOCATION, hdrs.CONTENT_RANGE, @@ -83,7 +86,7 @@ cdef tuple SINGLETON_HEADERS = ( hdrs.SERVER, hdrs.TRANSFER_ENCODING, hdrs.USER_AGENT, -) +}) cdef inline object extend(object buf, const char* at, size_t length): cdef Py_ssize_t s @@ -304,6 +307,7 @@ cdef class HttpParser: size_t _max_headers bint _response_with_body bint _read_until_eof + bint _lax bint _started object _url @@ -311,6 +315,7 @@ cdef class HttpParser: str _path str _reason list _headers + set _seen_singletons list _raw_headers bint _upgraded list _messages @@ -377,6 +382,8 @@ cdef class HttpParser: self._upgraded = False self._auto_decompress = auto_decompress self._content_encoding = None + self._lax = False + self._seen_singletons = set() self._csettings.on_url = cb_on_url self._csettings.on_status = cb_on_status @@ -405,6 +412,10 @@ cdef class HttpParser: if "\x00" in value: raise InvalidHeader(self._raw_value) + if not self._lax and name in SINGLETON_HEADERS: + if name in self._seen_singletons: + raise BadHttpMessage(f"Duplicate '{name}' header found.") + self._seen_singletons.add(name) self._headers.append((name, value)) if len(self._headers) > self._max_headers: raise BadHttpMessage("Too many headers received") @@ -444,14 +455,6 @@ cdef class HttpParser: raw_headers = tuple(self._raw_headers) headers = CIMultiDictProxy(CIMultiDict(self._headers)) - # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf - bad_hdr = next( - (h for h in SINGLETON_HEADERS if len(headers.getall(h, ())) > 1), - None, - ) - if bad_hdr is not None: - raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") - if self._cparser.type == cparser.HTTP_REQUEST: h_upg = headers.get("upgrade", "") allowed = upgrade and h_upg.isascii() and h_upg.lower() in ALLOWED_UPGRADES @@ -689,6 +692,7 @@ cdef class HttpResponseParser(HttpParser): cparser.llhttp_set_lenient_headers(self._cparser, 1) cparser.llhttp_set_lenient_optional_cr_before_lf(self._cparser, 1) cparser.llhttp_set_lenient_spaces_after_chunk_size(self._cparser, 1) + self._lax = True cdef object _on_status_complete(self): if self._buf: @@ -702,6 +706,7 @@ cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: pyparser._started = True pyparser._headers = [] + pyparser._seen_singletons = set() pyparser._raw_headers = [] PyByteArray_Resize(pyparser._buf, 0) pyparser._path = None diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index d0aee4d75c4..207cf8da39e 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -75,6 +75,26 @@ DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") +# RFC 9110 singleton headers — duplicates are rejected in strict mode. +# In lax mode (response parser default), the check is skipped entirely +# since real-world servers (e.g. Google APIs, Werkzeug) commonly send +# duplicate headers like Content-Type or Server. +# Lowercased for case-insensitive matching against wire names. +SINGLETON_HEADERS: Final[frozenset[str]] = frozenset( + { + "content-length", + "content-location", + "content-range", + "content-type", + "etag", + "host", + "max-forwards", + "server", + "transfer-encoding", + "user-agent", + } +) + class RawRequestMessage(NamedTuple): method: str @@ -194,6 +214,8 @@ def parse_headers( elif _FIELD_VALUE_FORBIDDEN_CTL_RE.search(value): raise InvalidHeader(bvalue) + if not self._lax and name in headers and name.lower() in SINGLETON_HEADERS: + raise BadHttpMessage(f"Duplicate '{name}' header found.") headers.add(name, value) raw_headers.append((bname, bvalue)) @@ -502,24 +524,6 @@ def parse_headers( upgrade = False chunked = False - # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 - # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf - singletons = ( - hdrs.CONTENT_LENGTH, - hdrs.CONTENT_LOCATION, - hdrs.CONTENT_RANGE, - hdrs.CONTENT_TYPE, - hdrs.ETAG, - hdrs.HOST, - hdrs.MAX_FORWARDS, - hdrs.SERVER, - hdrs.TRANSFER_ENCODING, - hdrs.USER_AGENT, - ) - bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) - if bad_hdr is not None: - raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") - # keep-alive and protocol switching # RFC 9110 section 7.6.1 defines Connection as a comma-separated list. conn_values = headers.getall(hdrs.CONNECTION, ()) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 6e877aaacd2..2c593a7589c 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -281,32 +281,76 @@ def test_content_length_transfer_encoding(parser: HttpRequestParser) -> None: "hdr", ( "Content-Length", + "Host", + "Transfer-Encoding", + ), +) +def test_duplicate_singleton_header_rejected( + parser: HttpRequestParser, hdr: str +) -> None: + val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2") + text = ( + f"GET /test HTTP/1.1\r\n" + f"Host: example.com\r\n" + f"{hdr}: {val1}\r\n" + f"{hdr}: {val2}\r\n" + "\r\n" + ).encode() + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + +@pytest.mark.parametrize( + "hdr", + ( "Content-Location", "Content-Range", "Content-Type", "ETag", - "Host", "Max-Forwards", "Server", - "Transfer-Encoding", "User-Agent", ), ) -def test_duplicate_singleton_header_rejected( +def test_duplicate_non_security_singleton_header_rejected_strict( parser: HttpRequestParser, hdr: str ) -> None: - val1, val2 = ("1", "2") if hdr == "Content-Length" else ("value1", "value2") + """Non-security singletons are rejected in strict mode (requests).""" text = ( f"GET /test HTTP/1.1\r\n" f"Host: example.com\r\n" - f"{hdr}: {val1}\r\n" - f"{hdr}: {val2}\r\n" - f"\r\n" + f"{hdr}: value1\r\n" + f"{hdr}: value2\r\n" + "\r\n" ).encode() with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): parser.feed_data(text) +@pytest.mark.parametrize( + "hdr", + ( + # Content-Length is excluded because llhttp rejects duplicates + # at the C level before our singleton check runs. + "Content-Location", + "Content-Range", + "Content-Type", + "ETag", + "Max-Forwards", + "Server", + "Transfer-Encoding", + "User-Agent", + ), +) +def test_duplicate_singleton_header_accepted_in_lax_mode( + response: HttpResponseParser, hdr: str +) -> None: + """All singleton duplicates are accepted in lax mode (response parser default).""" + text = (f"HTTP/1.1 200 OK\r\n{hdr}: value1\r\n{hdr}: value2\r\n\r\n").encode() + messages, upgrade, tail = response.feed_data(text) + assert len(messages) == 1 + + def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None: text = ( b"GET /admin HTTP/1.1\r\n" @@ -318,6 +362,45 @@ def test_duplicate_host_header_rejected(parser: HttpRequestParser) -> None: parser.feed_data(text) +@pytest.mark.parametrize( + ("hdr1", "hdr2"), + ( + ("content-length", "Content-Length"), + ("Content-Length", "content-length"), + ("transfer-encoding", "Transfer-Encoding"), + ("Transfer-Encoding", "transfer-encoding"), + ), +) +def test_duplicate_singleton_header_different_casing_rejected( + parser: HttpRequestParser, hdr1: str, hdr2: str +) -> None: + """Singleton check must be case-insensitive per RFC 9110.""" + val1, val2 = ("1", "2") if "content-length" in hdr1.lower() else ("v1", "v2") + text = ( + f"GET /test HTTP/1.1\r\n" + f"Host: example.com\r\n" + f"{hdr1}: {val1}\r\n" + f"{hdr2}: {val2}\r\n" + "\r\n" + ).encode() + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + +def test_duplicate_host_header_different_casing_rejected( + parser: HttpRequestParser, +) -> None: + """Duplicate Host with different casing must also be rejected.""" + text = ( + b"GET /test HTTP/1.1\r\n" + b"host: evil.example\r\n" + b"Host: good.example\r\n" + b"\r\n" + ) + with pytest.raises(http_exceptions.BadHttpMessage, match="Duplicate"): + parser.feed_data(text) + + def test_bad_chunked(parser: HttpRequestParser) -> None: """Test that invalid chunked encoding doesn't allow content-length to be used.""" text = (