Skip to content

Commit b0274f2

Browse files
authored
closes bpo-34056: Always return bytes from _HackedGetData.get_data(). (GH-8130)
* Always return bytes from _HackedGetData.get_data(). Ensure the imp.load_source shim always returns bytes by reopening the file in binary mode if needed. Hash-based pycs have to receive the source code in bytes. It's tempting to change imp.get_suffixes() to always return 'rb' as a mode, but that breaks some stdlib tests and likely 3rdparty code, too.
1 parent e25399b commit b0274f2

File tree

3 files changed

+24
-7
lines changed

3 files changed

+24
-7
lines changed

Lib/imp.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,17 +142,16 @@ def __init__(self, fullname, path, file=None):
142142
def get_data(self, path):
143143
"""Gross hack to contort loader to deal w/ load_*()'s bad API."""
144144
if self.file and path == self.path:
145+
# The contract of get_data() requires us to return bytes. Reopen the
146+
# file in binary mode if needed.
145147
if not self.file.closed:
146148
file = self.file
147-
else:
148-
self.file = file = open(self.path, 'r')
149+
if 'b' not in file.mode:
150+
file.close()
151+
if self.file.closed:
152+
self.file = file = open(self.path, 'rb')
149153

150154
with file:
151-
# Technically should be returning bytes, but
152-
# SourceLoader.get_code() just passed what is returned to
153-
# compile() which can handle str. And converting to bytes would
154-
# require figuring out the encoding to decode to and
155-
# tokenize.detect_encoding() only accepts bytes.
156155
return file.read()
157156
else:
158157
return super().get_data(path)

Lib/test/test_imp.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import importlib.util
33
import os
44
import os.path
5+
import py_compile
56
import sys
67
from test import support
78
from test.support import script_helper
@@ -350,6 +351,20 @@ def test_pyc_invalidation_mode_from_cmdline(self):
350351
res = script_helper.assert_python_ok(*args)
351352
self.assertEqual(res.out.strip().decode('utf-8'), expected)
352353

354+
def test_find_and_load_checked_pyc(self):
355+
# issue 34056
356+
with support.temp_cwd():
357+
with open('mymod.py', 'wb') as fp:
358+
fp.write(b'x = 42\n')
359+
py_compile.compile(
360+
'mymod.py',
361+
doraise=True,
362+
invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
363+
)
364+
file, path, description = imp.find_module('mymod', path=['.'])
365+
mod = imp.load_module('mymod', file, path, description)
366+
self.assertEqual(mod.x, 42)
367+
353368

354369
class ReloadTests(unittest.TestCase):
355370

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Ensure the loader shim created by ``imp.load_module`` always returns bytes
2+
from its ``get_data()`` function. This fixes using ``imp.load_module`` with
3+
:pep:`552` hash-based pycs.

0 commit comments

Comments
 (0)