Skip to content

Commit 50dd1f7

Browse files
committed
Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8
Patch by Anthony Sottile.
1 parent 06172e7 commit 50dd1f7

File tree

4 files changed

+29
-1
lines changed

4 files changed

+29
-1
lines changed

Lib/test/test_wsgiref.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from unittest import mock
12
from unittest import TestCase
23
from wsgiref.util import setup_testing_defaults
34
from wsgiref.headers import Headers
@@ -221,6 +222,29 @@ def app(e, s):
221222
b"data",
222223
out)
223224

225+
def test_cp1252_url(self):
226+
def app(e, s):
227+
s("200 OK", [
228+
("Content-Type", "text/plain"),
229+
("Date", "Wed, 24 Dec 2008 13:29:32 GMT"),
230+
])
231+
# PEP3333 says environ variables are decoded as latin1.
232+
# Encode as latin1 to get original bytes
233+
return [e["PATH_INFO"].encode("latin1")]
234+
235+
out, err = run_amock(
236+
validator(app), data=b"GET /\x80%80 HTTP/1.0")
237+
self.assertEqual(
238+
[
239+
b"HTTP/1.0 200 OK",
240+
mock.ANY,
241+
b"Content-Type: text/plain",
242+
b"Date: Wed, 24 Dec 2008 13:29:32 GMT",
243+
b"",
244+
b"/\x80\x80",
245+
],
246+
out.splitlines())
247+
224248

225249
class UtilityTests(TestCase):
226250

Lib/wsgiref/simple_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def get_environ(self):
8282
else:
8383
path,query = self.path,''
8484

85-
env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1')
85+
env['PATH_INFO'] = urllib.parse.unquote(path, 'iso-8859-1')
8686
env['QUERY_STRING'] = query
8787

8888
host = self.address_string()

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,6 +1376,7 @@ Nir Soffer
13761376
Paul Sokolovsky
13771377
Evgeny Sologubov
13781378
Cody Somerville
1379+
Anthony Sottile
13791380
Edoardo Spadolini
13801381
Geoffrey Spear
13811382
Clay Spence

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ Core and Builtins
107107
Library
108108
-------
109109

110+
- Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8. Patch by
111+
Anthony Sottile.
112+
110113
- Issue #26735: Fix :func:`os.urandom` on Solaris 11.3 and newer when reading
111114
more than 1,024 bytes: call ``getrandom()`` multiple times with a limit of
112115
1024 bytes per call.

0 commit comments

Comments
 (0)