From 35a1d5854cec5a8f92aafcf1e56aeebd77cf318f Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Wed, 27 May 2026 17:28:08 +0800 Subject: [PATCH] gh-150499: http.server: drain unread request body on persistent connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to gh-150499 (paired with #150500 which lands the ยง3.3.3 rules). Wrap rfile with a small byte-counting reader for the duration of the request and, after the handler returns, drain any unread declared body up to a 1 MiB cap (or close the connection if the remainder is larger). Without this, the next iteration of the keep-alive loop parses the leftover body as a request line, per RFC 7230 section 6.3. The wrapper proxies read, read1, readline, readinto, and readinto1 and falls back to __getattr__ for everything else, so existing handlers (including the ServerHandler chain in wsgiref.simple_server, which passes self.rfile directly to WSGI applications) continue to work unchanged. Add RFC7230BodyDrainTestCase asserting that the leftover body is not parsed as the next request line on a persistent connection. Signed-off-by: tonghuaroot --- Lib/http/server.py | 82 ++++++++++++++++++- Lib/test/test_httpservers.py | 58 +++++++++++++ ...-05-27-09-15-22.gh-issue-150499.dBoDr4.rst | 5 ++ 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-05-27-09-15-22.gh-issue-150499.dBoDr4.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index ebc85052aecb90..d509c006102aed 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -170,6 +170,46 @@ class ThreadingHTTPSServer(socketserver.ThreadingMixIn, HTTPSServer): daemon_threads = True +class _ReadCountingReader: + # Proxy around the underlying rfile that tracks the number of bytes + # consumed, so handle_one_request can drain or close per RFC 7230 + # section 6.3. + + def __init__(self, stream): + self._stream = stream + self.bytes_read = 0 + + def read(self, *args, **kwargs): + data = self._stream.read(*args, **kwargs) + self.bytes_read += len(data) + return data + + def read1(self, *args, **kwargs): + data = self._stream.read1(*args, **kwargs) + self.bytes_read += len(data) + return data + + def readline(self, *args, **kwargs): + data = self._stream.readline(*args, **kwargs) + self.bytes_read += len(data) + return data + + def readinto(self, b): + n = self._stream.readinto(b) + if n: + self.bytes_read += n + return n + + def readinto1(self, b): + n = self._stream.readinto1(b) + if n: + self.bytes_read += n + return n + + def __getattr__(self, name): + return getattr(self._stream, name) + + class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): """HTTP request handler base class. @@ -426,6 +466,11 @@ def handle_expect_100(self): self.end_headers() return True + # Maximum number of bytes drained from an unread request body after + # dispatch to honour RFC 7230 section 6.3; over this, the connection + # is closed instead. + _MAX_BODY_DRAIN = 1 << 20 + def handle_one_request(self): """Handle a single HTTP request. @@ -434,8 +479,10 @@ def handle_one_request(self): commands such as GET and POST. """ + raw_rfile = self.rfile + wrapped = False try: - self.raw_requestline = self.rfile.readline(65537) + self.raw_requestline = raw_rfile.readline(65537) if len(self.raw_requestline) > 65536: self.requestline = '' self.request_version = '' @@ -445,9 +492,13 @@ def handle_one_request(self): if not self.raw_requestline: self.close_connection = True return + # Wrap rfile to track body consumption for RFC 7230 section 6.3. + self.rfile = _ReadCountingReader(raw_rfile) + wrapped = True if not self.parse_request(): # An error code has been sent, just exit return + body_bytes_start = self.rfile.bytes_read mname = 'do_' + self.command if not hasattr(self, mname): self.send_error( @@ -457,11 +508,40 @@ def handle_one_request(self): method = getattr(self, mname) method() self.wfile.flush() #actually send the response if not already done. + self._drain_request_body(body_bytes_start) except TimeoutError as e: #a read or a write timed out. Discard this connection self.log_error("Request timed out: %r", e) self.close_connection = True return + finally: + if wrapped: + self.rfile = raw_rfile + + def _drain_request_body(self, body_bytes_start): + # Drain any unread declared request body, or close the connection + # if the remainder exceeds the drain cap. Required by RFC 7230 + # section 6.3 for persistent connections. + if self.close_connection: + return + cl = self.headers.get('Content-Length') + if not cl or not cl.isdigit(): + return + declared = int(cl) + consumed = self.rfile.bytes_read - body_bytes_start + remaining = declared - consumed + if remaining <= 0: + return + if remaining > self._MAX_BODY_DRAIN: + self.close_connection = True + return + try: + drained = self.rfile.read(remaining) + except OSError: + self.close_connection = True + return + if len(drained) < remaining: + self.close_connection = True def handle(self): """Handle multiple requests if necessary.""" diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index d4ae032610a91e..f5d17c49634f5e 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -364,6 +364,64 @@ def test_head_via_send_error(self): self.assertEqual(b'', data) +class RFC7230BodyDrainTestCase(BaseTestCase): + """Exercise the post-dispatch body drain added for RFC 7230 section 6.3.""" + + class request_handler(NoLogRequestHandler, BaseHTTPRequestHandler): + protocol_version = 'HTTP/1.1' + default_request_version = 'HTTP/1.1' + + def do_GET(self): + # Deliberately does not read the body, to exercise the + # post-dispatch body drain. + out = b'GET ok\n' + self.send_response(HTTPStatus.OK) + self.send_header('Content-Type', 'text/plain') + self.send_header('Content-Length', str(len(out))) + self.end_headers() + self.wfile.write(out) + + def _send_raw(self, payload, timeout=2): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(timeout) + sock.connect((self.HOST, self.PORT)) + try: + sock.sendall(payload) + data = b'' + try: + while True: + chunk = sock.recv(4096) + if not chunk: + break + data += chunk + except TimeoutError: + pass + finally: + sock.close() + return data + + def test_body_drained_on_persistent_connection(self): + # The leftover body of a GET request must not be parsed as the + # next request line on a keep-alive connection. + body_filler = b'A' * 100 + smuggled = b'GET /pwn HTTP/1.1\r\nHost: 127.0.0.1\r\n\r\n' + data = self._send_raw( + b'GET / HTTP/1.1\r\n' + b'Host: 127.0.0.1\r\n' + b'Content-Length: 100\r\n' + b'\r\n' + + body_filler + + smuggled + ) + # The leftover body must not be parsed as a malformed request + # line; we should never see "Unsupported method ('AAAA...')". + self.assertNotIn(b"Unsupported method ('AAA", data) + # The smuggled GET /pwn is a well-formed request that arrives + # after the body has been drained, so the server should reply + # to it normally as the second request on the connection. + self.assertEqual(data.count(b'HTTP/1.1 200'), 2) + + class HTTP09ServerTestCase(BaseTestCase): class request_handler(NoLogRequestHandler, BaseHTTPRequestHandler): diff --git a/Misc/NEWS.d/next/Library/2026-05-27-09-15-22.gh-issue-150499.dBoDr4.rst b/Misc/NEWS.d/next/Library/2026-05-27-09-15-22.gh-issue-150499.dBoDr4.rst new file mode 100644 index 00000000000000..32350fb6558496 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-27-09-15-22.gh-issue-150499.dBoDr4.rst @@ -0,0 +1,5 @@ +:mod:`http.server` now drains any unread declared request body up to +1 MiB after dispatch on persistent connections, and closes the connection +when the remainder exceeds that cap, per :rfc:`7230#section-6.3`. This +prevents the next iteration of the keep-alive loop from parsing leftover +body bytes as a request line.