Skip to content

Commit 79cf20e

Browse files
bpo-21736: Set __file__ on frozen stdlib modules. (gh-28656)
Currently frozen modules do not have __file__ set. In their spec, origin is set to "frozen" and they are marked as not having a location. (Similarly, for frozen packages __path__ is set to an empty list.) However, for frozen stdlib modules we are able to extrapolate __file__ as long as we can determine the stdlib directory at runtime. (We now do so since gh-28586.) Having __file__ set is helpful for a number of reasons. Likewise, having a non-empty __path__ means we can import submodules of a frozen package from the filesystem (e.g. we could partially freeze the encodings module). This change sets __file__ (and adds to __path__) for frozen stdlib modules. It uses sys._stdlibdir (from gh-28586) and the frozen module alias information (from gh-28655). All that work is done in FrozenImporter (in Lib/importlib/_bootstrap.py). Also, if a frozen module is imported before importlib is bootstrapped (during interpreter initialization) then we fix up that module and its spec during the importlib bootstrapping step (i.e. imporlib._bootstrap._setup()) to match what gets set by FrozenImporter, including setting the file info (if the stdlib dir is known). To facilitate this, modules imported using PyImport_ImportFrozenModule() have __origname__ set using the frozen module alias info. __origname__ is popped off during importlib bootstrap. (To be clear, even with this change the new code to set __file__ during fixups in imporlib._bootstrap._setup() doesn't actually get triggered yet. This is because sys._stdlibdir hasn't been set yet in interpreter initialization at the point importlib is bootstrapped. However, we do fix up such modules at that point to otherwise match the result of importing through FrozenImporter, just not the __file__ and __path__ parts. Doing so will require changes in the order in which things happen during interpreter initialization. That can be addressed separately. Once it is, the file-related fixup code from this PR will kick in.) Here are things this change does not do: * set __file__ for non-stdlib modules (no way of knowing the parent dir) * set __file__ if the stdlib dir is not known (nor assume the expense of finding it) * relatedly, set __file__ if the stdlib is in a zip file * verify that the filename set to __file__ actually exists (too expensive) * update __path__ for frozen packages that alias a non-package (since there is no package dir) Other things this change skips, but we may do later: * set __file__ on modules imported using PyImport_ImportFrozenModule() * set co_filename when we unmarshal the frozen code object while importing the module (e.g. in FrozenImporter.exec_module()) -- this would allow tracebacks to show source lines * implement FrozenImporter.get_filename() and FrozenImporter.get_source() https://bugs.python.org/issue21736
1 parent b2af211 commit 79cf20e

File tree

7 files changed

+240
-94
lines changed

7 files changed

+240
-94
lines changed

Lib/importlib/_bootstrap.py

Lines changed: 136 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,10 @@ def has_location(self, value):
421421

422422
def spec_from_loader(name, loader, *, origin=None, is_package=None):
423423
"""Return a module spec based on various loader methods."""
424-
if hasattr(loader, 'get_filename'):
424+
if origin is None:
425+
origin = getattr(loader, '_ORIGIN', None)
426+
427+
if not origin and hasattr(loader, 'get_filename'):
425428
if _bootstrap_external is None:
426429
raise NotImplementedError
427430
spec_from_file_location = _bootstrap_external.spec_from_file_location
@@ -467,12 +470,9 @@ def _spec_from_module(module, loader=None, origin=None):
467470
except AttributeError:
468471
location = None
469472
if origin is None:
470-
if location is None:
471-
try:
472-
origin = loader._ORIGIN
473-
except AttributeError:
474-
origin = None
475-
else:
473+
if loader is not None:
474+
origin = getattr(loader, '_ORIGIN', None)
475+
if not origin and location is not None:
476476
origin = location
477477
try:
478478
cached = module.__cached__
@@ -484,7 +484,7 @@ def _spec_from_module(module, loader=None, origin=None):
484484
submodule_search_locations = None
485485

486486
spec = ModuleSpec(name, loader, origin=origin)
487-
spec._set_fileattr = False if location is None else True
487+
spec._set_fileattr = False if location is None else (origin == location)
488488
spec.cached = cached
489489
spec.submodule_search_locations = submodule_search_locations
490490
return spec
@@ -541,6 +541,7 @@ def _init_module_attrs(spec, module, *, override=False):
541541
# __path__
542542
if override or getattr(module, '__path__', None) is None:
543543
if spec.submodule_search_locations is not None:
544+
# XXX We should extend __path__ if it's already a list.
544545
try:
545546
module.__path__ = spec.submodule_search_locations
546547
except AttributeError:
@@ -825,38 +826,127 @@ def module_repr(m):
825826
return '<module {!r} ({})>'.format(m.__name__, FrozenImporter._ORIGIN)
826827

827828
@classmethod
828-
def _setup_module(cls, module):
829-
assert not hasattr(module, '__file__'), module.__file__
830-
ispkg = hasattr(module, '__path__')
831-
assert not ispkg or not module.__path__, module.__path__
829+
def _fix_up_module(cls, module):
832830
spec = module.__spec__
833-
assert not ispkg or not spec.submodule_search_locations
831+
state = spec.loader_state
832+
if state is None:
833+
# The module is missing FrozenImporter-specific values.
834834

835-
if spec.loader_state is None:
836-
spec.loader_state = type(sys.implementation)(
837-
data=None,
838-
origname=None,
839-
)
840-
elif not hasattr(spec.loader_state, 'data'):
841-
spec.loader_state.data = None
842-
if not getattr(spec.loader_state, 'origname', None):
835+
# Fix up the spec attrs.
843836
origname = vars(module).pop('__origname__', None)
844837
assert origname, 'see PyImport_ImportFrozenModuleObject()'
845-
spec.loader_state.origname = origname
838+
ispkg = hasattr(module, '__path__')
839+
assert _imp.is_frozen_package(module.__name__) == ispkg, ispkg
840+
filename, pkgdir = cls._resolve_filename(origname, spec.name, ispkg)
841+
spec.loader_state = type(sys.implementation)(
842+
filename=filename,
843+
origname=origname,
844+
)
845+
__path__ = spec.submodule_search_locations
846+
if ispkg:
847+
assert __path__ == [], __path__
848+
if pkgdir:
849+
spec.submodule_search_locations.insert(0, pkgdir)
850+
else:
851+
assert __path__ is None, __path__
852+
853+
# Fix up the module attrs (the bare minimum).
854+
assert not hasattr(module, '__file__'), module.__file__
855+
if filename:
856+
try:
857+
module.__file__ = filename
858+
except AttributeError:
859+
pass
860+
if ispkg:
861+
if module.__path__ != __path__:
862+
assert module.__path__ == [], module.__path__
863+
module.__path__.extend(__path__)
864+
else:
865+
# These checks ensure that _fix_up_module() is only called
866+
# in the right places.
867+
__path__ = spec.submodule_search_locations
868+
ispkg = __path__ is not None
869+
# Check the loader state.
870+
assert sorted(vars(state)) == ['filename', 'origname'], state
871+
if state.origname:
872+
# The only frozen modules with "origname" set are stdlib modules.
873+
(__file__, pkgdir,
874+
) = cls._resolve_filename(state.origname, spec.name, ispkg)
875+
assert state.filename == __file__, (state.filename, __file__)
876+
if pkgdir:
877+
assert __path__ == [pkgdir], (__path__, pkgdir)
878+
else:
879+
assert __path__ == ([] if ispkg else None), __path__
880+
else:
881+
__file__ = None
882+
assert state.filename is None, state.filename
883+
assert __path__ == ([] if ispkg else None), __path__
884+
# Check the file attrs.
885+
if __file__:
886+
assert hasattr(module, '__file__')
887+
assert module.__file__ == __file__, (module.__file__, __file__)
888+
else:
889+
assert not hasattr(module, '__file__'), module.__file__
890+
if ispkg:
891+
assert hasattr(module, '__path__')
892+
assert module.__path__ == __path__, (module.__path__, __path__)
893+
else:
894+
assert not hasattr(module, '__path__'), module.__path__
895+
assert not spec.has_location
896+
897+
@classmethod
898+
def _resolve_filename(cls, fullname, alias=None, ispkg=False):
899+
if not fullname or not getattr(sys, '_stdlib_dir', None):
900+
return None, None
901+
try:
902+
sep = cls._SEP
903+
except AttributeError:
904+
sep = cls._SEP = '\\' if sys.platform == 'win32' else '/'
905+
906+
if fullname != alias:
907+
if fullname.startswith('<'):
908+
fullname = fullname[1:]
909+
if not ispkg:
910+
fullname = f'{fullname}.__init__'
911+
else:
912+
ispkg = False
913+
relfile = fullname.replace('.', sep)
914+
if ispkg:
915+
pkgdir = f'{sys._stdlib_dir}{sep}{relfile}'
916+
filename = f'{pkgdir}{sep}__init__.py'
917+
else:
918+
pkgdir = None
919+
filename = f'{sys._stdlib_dir}{sep}{relfile}.py'
920+
return filename, pkgdir
846921

847922
@classmethod
848923
def find_spec(cls, fullname, path=None, target=None):
849924
info = _call_with_frames_removed(_imp.find_frozen, fullname)
850925
if info is None:
851926
return None
852-
data, ispkg, origname = info
927+
# We get the marshaled data in exec_module() (the loader
928+
# part of the importer), instead of here (the finder part).
929+
# The loader is the usual place to get the data that will
930+
# be loaded into the module. (For example, see _LoaderBasics
931+
# in _bootstra_external.py.) Most importantly, this importer
932+
# is simpler if we wait to get the data.
933+
# However, getting as much data in the finder as possible
934+
# to later load the module is okay, and sometimes important.
935+
# (That's why ModuleSpec.loader_state exists.) This is
936+
# especially true if it avoids throwing away expensive data
937+
# the loader would otherwise duplicate later and can be done
938+
# efficiently. In this case it isn't worth it.
939+
_, ispkg, origname = info
853940
spec = spec_from_loader(fullname, cls,
854941
origin=cls._ORIGIN,
855942
is_package=ispkg)
943+
filename, pkgdir = cls._resolve_filename(origname, fullname, ispkg)
856944
spec.loader_state = type(sys.implementation)(
857-
data=data,
945+
filename=filename,
858946
origname=origname,
859947
)
948+
if pkgdir:
949+
spec.submodule_search_locations.insert(0, pkgdir)
860950
return spec
861951

862952
@classmethod
@@ -873,26 +963,22 @@ def find_module(cls, fullname, path=None):
873963

874964
@staticmethod
875965
def create_module(spec):
876-
"""Use default semantics for module creation."""
966+
"""Set __file__, if able."""
967+
module = _new_module(spec.name)
968+
try:
969+
filename = spec.loader_state.filename
970+
except AttributeError:
971+
pass
972+
else:
973+
if filename:
974+
module.__file__ = filename
975+
return module
877976

878977
@staticmethod
879978
def exec_module(module):
880979
spec = module.__spec__
881980
name = spec.name
882-
try:
883-
data = spec.loader_state.data
884-
except AttributeError:
885-
if not _imp.is_frozen(name):
886-
raise ImportError('{!r} is not a frozen module'.format(name),
887-
name=name)
888-
data = None
889-
else:
890-
# We clear the extra data we got from the finder, to save memory.
891-
# Note that if this method is called again (e.g. by
892-
# importlib.reload()) then _imp.get_frozen_object() will notice
893-
# no data was provided and will look it up.
894-
spec.loader_state.data = None
895-
code = _call_with_frames_removed(_imp.get_frozen_object, name, data)
981+
code = _call_with_frames_removed(_imp.get_frozen_object, name)
896982
exec(code, module.__dict__)
897983

898984
@classmethod
@@ -903,7 +989,16 @@ def load_module(cls, fullname):
903989
904990
"""
905991
# Warning about deprecation implemented in _load_module_shim().
906-
return _load_module_shim(cls, fullname)
992+
module = _load_module_shim(cls, fullname)
993+
info = _imp.find_frozen(fullname)
994+
assert info is not None
995+
_, ispkg, origname = info
996+
module.__origname__ = origname
997+
vars(module).pop('__file__', None)
998+
if ispkg:
999+
module.__path__ = []
1000+
cls._fix_up_module(module)
1001+
return module
9071002

9081003
@classmethod
9091004
@_requires_frozen
@@ -1244,7 +1339,7 @@ def _setup(sys_module, _imp_module):
12441339
spec = _spec_from_module(module, loader)
12451340
_init_module_attrs(spec, module)
12461341
if loader is FrozenImporter:
1247-
loader._setup_module(module)
1342+
loader._fix_up_module(module)
12481343

12491344
# Directly load built-in modules needed during bootstrap.
12501345
self_module = sys.modules[__name__]

Lib/test/test_frozen.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ def test_frozen_submodule_in_unfrozen_package(self):
3939
self.assertIs(spam.__spec__.loader,
4040
importlib.machinery.FrozenImporter)
4141

42-
# This is not possible until frozen packages have __path__ set properly.
43-
# See https://bugs.python.org/issue21736.
44-
@unittest.expectedFailure
4542
def test_unfrozen_submodule_in_frozen_package(self):
4643
with import_helper.CleanImport('__phello__', '__phello__.spam'):
4744
with import_helper.frozen_modules(enabled=True):

Lib/test/test_importlib/frozen/test_finder.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,30 +44,31 @@ def check_loader_state(self, spec, origname=None, filename=None):
4444
if not filename:
4545
if not origname:
4646
origname = spec.name
47+
filename = resolve_stdlib_file(origname)
4748

4849
actual = dict(vars(spec.loader_state))
4950

50-
# Check the code object used to import the frozen module.
51-
# We can't compare the marshaled data directly because
52-
# marshal.dumps() would mark "expected" (below) as a ref,
53-
# which slightly changes the output.
54-
# (See https://bugs.python.org/issue34093.)
55-
data = actual.pop('data')
56-
with import_helper.frozen_modules():
57-
expected = _imp.get_frozen_object(spec.name)
58-
code = marshal.loads(data)
59-
self.assertEqual(code, expected)
60-
6151
# Check the rest of spec.loader_state.
6252
expected = dict(
6353
origname=origname,
54+
filename=filename if origname else None,
6455
)
6556
self.assertDictEqual(actual, expected)
6657

6758
def check_search_locations(self, spec):
68-
# Frozen packages do not have any path entries.
69-
# (See https://bugs.python.org/issue21736.)
70-
expected = []
59+
"""This is only called when testing packages."""
60+
missing = object()
61+
filename = getattr(spec.loader_state, 'filename', missing)
62+
origname = getattr(spec.loader_state, 'origname', None)
63+
if not origname or filename is missing:
64+
# We deal with this in check_loader_state().
65+
return
66+
if not filename:
67+
expected = []
68+
elif origname != spec.name and not origname.startswith('<'):
69+
expected = []
70+
else:
71+
expected = [os.path.dirname(filename)]
7172
self.assertListEqual(spec.submodule_search_locations, expected)
7273

7374
def test_module(self):

0 commit comments

Comments
 (0)