Skip to content

Commit 3b81c13

Browse files
[3.9] gh-100001: Omit control characters in http.server stderr logs. (GH-100002) (#100032)
* gh-100001: Omit control characters in http.server stderr logs. (GH-100002) Replace control characters in http.server.BaseHTTPRequestHandler.log_message with an escaped \xHH sequence to avoid causing problems for the terminal the output is printed to. (cherry picked from commit d8ab0a4) Co-authored-by: Gregory P. Smith <[email protected]> * also escape \s (backport of PR #100038). * add versionadded and remove extra 'to' Co-authored-by: Gregory P. Smith <[email protected]>
1 parent 7b98207 commit 3b81c13

File tree

4 files changed

+46
-2
lines changed

4 files changed

+46
-2
lines changed

Doc/library/http.server.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,3 +499,12 @@ Security Considerations
499499
:class:`SimpleHTTPRequestHandler` will follow symbolic links when handling
500500
requests, this makes it possible for files outside of the specified directory
501501
to be served.
502+
503+
Earlier versions of Python did not scrub control characters from the
504+
log messages emitted to stderr from ``python -m http.server`` or the
505+
default :class:`BaseHTTPRequestHandler` ``.log_message``
506+
implementation. This could allow remote clients connecting to your
507+
server to send nefarious control codes to your terminal.
508+
509+
.. versionadded:: 3.9.16
510+
scrubbing control characters from log messages

Lib/http/server.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
import html
9494
import http.client
9595
import io
96+
import itertools
9697
import mimetypes
9798
import os
9899
import posixpath
@@ -563,6 +564,11 @@ def log_error(self, format, *args):
563564

564565
self.log_message(format, *args)
565566

567+
# https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
568+
_control_char_table = str.maketrans(
569+
{c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})
570+
_control_char_table[ord('\\')] = r'\\'
571+
566572
def log_message(self, format, *args):
567573
"""Log an arbitrary message.
568574
@@ -578,12 +584,16 @@ def log_message(self, format, *args):
578584
The client ip and current date/time are prefixed to
579585
every message.
580586
587+
Unicode control characters are replaced with escaped hex
588+
before writing the output to stderr.
589+
581590
"""
582591

592+
message = format % args
583593
sys.stderr.write("%s - - [%s] %s\n" %
584594
(self.address_string(),
585595
self.log_date_time_string(),
586-
format%args))
596+
message.translate(self._control_char_table)))
587597

588598
def version_string(self):
589599
"""Return the server software version string."""

Lib/test/test_httpservers.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import datetime
2727
import threading
2828
from unittest import mock
29-
from io import BytesIO
29+
from io import BytesIO, StringIO
3030

3131
import unittest
3232
from test import support
@@ -982,6 +982,25 @@ def verify_http_server_response(self, response):
982982
match = self.HTTPResponseMatch.search(response)
983983
self.assertIsNotNone(match)
984984

985+
def test_unprintable_not_logged(self):
986+
# We call the method from the class directly as our Socketless
987+
# Handler subclass overrode it... nice for everything BUT this test.
988+
self.handler.client_address = ('127.0.0.1', 1337)
989+
log_message = BaseHTTPRequestHandler.log_message
990+
with mock.patch.object(sys, 'stderr', StringIO()) as fake_stderr:
991+
log_message(self.handler, '/foo')
992+
log_message(self.handler, '/\033bar\000\033')
993+
log_message(self.handler, '/spam %s.', 'a')
994+
log_message(self.handler, '/spam %s.', '\033\x7f\x9f\xa0beans')
995+
stderr = fake_stderr.getvalue()
996+
self.assertNotIn('\033', stderr) # non-printable chars are caught.
997+
self.assertNotIn('\000', stderr) # non-printable chars are caught.
998+
lines = stderr.splitlines()
999+
self.assertIn('/foo', lines[0])
1000+
self.assertIn(r'/\x1bbar\x00\x1b', lines[1])
1001+
self.assertIn('/spam a.', lines[2])
1002+
self.assertIn('/spam \\x1b\\x7f\\x9f\xa0beans.', lines[3])
1003+
9851004
def test_http_1_1(self):
9861005
result = self.send_typical_request(b'GET / HTTP/1.1\r\n\r\n')
9871006
self.verify_http_server_response(result[0])
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
``python -m http.server`` no longer allows terminal control characters sent
2+
within a garbage request to be printed to the stderr server log.
3+
4+
This is done by changing the :mod:`http.server` :class:`BaseHTTPRequestHandler`
5+
``.log_message`` method to replace control characters with a ``\xHH`` hex escape
6+
before printing.

0 commit comments

Comments
 (0)